Completed
Push — development ( 29b82e...e01cc9 )
by Ashutosh
10:05
created

HTML5_TreeBuilder   F

Complexity

Total Complexity 796

Size/Duplication

Total Lines 3948
Duplicated Lines 0 %

Importance

Changes 0
Metric Value
wmc 796
eloc 1827
dl 0
loc 3948
rs 0.8
c 0
b 0
f 0

How to fix   Complexity   

Complex Class

Complex classes like HTML5_TreeBuilder often do a lot of different things. To break such a class down, we need to identify a cohesive component within that class. A common approach to find such a component is to look for fields/methods that share the same prefixes, or suffixes.

Once you have determined the fields that belong together, you can apply the Extract Class refactoring. If the component makes sense as a sub-class, Extract Subclass is also a candidate, and is often faster.

While breaking up the class, it is a good idea to analyze how other classes use HTML5_TreeBuilder, and based on these observations, apply Extract Interface, too.

1
<?php
2
3
/*
4
5
Copyright 2007 Jeroen van der Meer <http://jero.net/>
6
Copyright 2009 Edward Z. Yang <[email protected]>
7
8
Permission is hereby granted, free of charge, to any person obtaining a
9
copy of this software and associated documentation files (the
10
"Software"), to deal in the Software without restriction, including
11
without limitation the rights to use, copy, modify, merge, publish,
12
distribute, sublicense, and/or sell copies of the Software, and to
13
permit persons to whom the Software is furnished to do so, subject to
14
the following conditions:
15
16
The above copyright notice and this permission notice shall be included
17
in all copies or substantial portions of the Software.
18
19
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
20
OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
21
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
22
IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
23
CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
24
TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
25
SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
26
27
*/
28
29
// Tags for FIX ME!!!: (in order of priority)
30
//      XXX - should be fixed NAO!
31
//      XERROR - with regards to parse errors
32
//      XSCRIPT - with regards to scripting mode
33
//      XENCODING - with regards to encoding (for reparsing tests)
34
//      XDOM - DOM specific code (tagName is explicitly not marked).
35
//          this is not (yet) in helper functions.
36
37
class HTML5_TreeBuilder {
38
    public $stack = array();
39
    public $content_model;
40
41
    private $mode;
42
    private $original_mode;
43
    private $secondary_mode;
44
    private $dom;
45
    // Whether or not normal insertion of nodes should actually foster
46
    // parent (used in one case in spec)
47
    private $foster_parent = false;
48
    private $a_formatting  = array();
49
50
    private $head_pointer = null;
51
    private $form_pointer = null;
52
53
    private $flag_frameset_ok = true;
54
    private $flag_force_quirks = false;
55
    private $ignored = false;
56
    private $quirks_mode = null;
57
    // this gets to 2 when we want to ignore the next lf character, and
58
    // is decrement at the beginning of each processed token (this way,
59
    // code can check for (bool)$ignore_lf_token, but it phases out
60
    // appropriately)
61
    private $ignore_lf_token = 0;
62
    private $fragment = false;
63
    private $root;
64
65
    private $scoping = array('applet','button','caption','html','marquee','object','table','td','th', 'svg:foreignObject');
66
    private $formatting = array('a','b','big','code','em','font','i','nobr','s','small','strike','strong','tt','u');
67
    // dl and ds are speculative
68
    private $special = array('address','area','article','aside','base','basefont','bgsound',
69
    'blockquote','body','br','center','col','colgroup','command','dc','dd','details','dir','div','dl','ds',
70
    'dt','embed','fieldset','figure','footer','form','frame','frameset','h1','h2','h3','h4','h5',
71
    'h6','head','header','hgroup','hr','iframe','img','input','isindex','li','link',
72
    'listing','menu','meta','nav','noembed','noframes','noscript','ol',
73
    'p','param','plaintext','pre','script','select','spacer','style',
74
    'tbody','textarea','tfoot','thead','title','tr','ul','wbr');
75
76
    private $pendingTableCharacters;
77
    private $pendingTableCharactersDirty;
78
79
    // Tree construction modes
80
    const INITIAL           = 0;
81
    const BEFORE_HTML       = 1;
82
    const BEFORE_HEAD       = 2;
83
    const IN_HEAD           = 3;
84
    const IN_HEAD_NOSCRIPT  = 4;
85
    const AFTER_HEAD        = 5;
86
    const IN_BODY           = 6;
87
    const IN_CDATA_RCDATA   = 7;
88
    const IN_TABLE          = 8;
89
    const IN_TABLE_TEXT     = 9;
90
    const IN_CAPTION        = 10;
91
    const IN_COLUMN_GROUP   = 11;
92
    const IN_TABLE_BODY     = 12;
93
    const IN_ROW            = 13;
94
    const IN_CELL           = 14;
95
    const IN_SELECT         = 15;
96
    const IN_SELECT_IN_TABLE= 16;
97
    const IN_FOREIGN_CONTENT= 17;
98
    const AFTER_BODY        = 18;
99
    const IN_FRAMESET       = 19;
100
    const AFTER_FRAMESET    = 20;
101
    const AFTER_AFTER_BODY  = 21;
102
    const AFTER_AFTER_FRAMESET = 22;
103
104
    /**
105
     * Converts a magic number to a readable name. Use for debugging.
106
     */
107
    private function strConst($number) {
108
        static $lookup;
109
        if (!$lookup) {
110
            $lookup = array();
111
            $r = new ReflectionClass('HTML5_TreeBuilder');
112
            $consts = $r->getConstants();
113
            foreach ($consts as $const => $num) {
114
                if (!is_int($num)) {
115
                    continue;
116
                }
117
                $lookup[$num] = $const;
118
            }
119
        }
120
        return $lookup[$number];
121
    }
122
123
    // The different types of elements.
124
    const SPECIAL    = 100;
125
    const SCOPING    = 101;
126
    const FORMATTING = 102;
127
    const PHRASING   = 103;
128
129
    // Quirks modes in $quirks_mode
130
    const NO_QUIRKS             = 200;
131
    const QUIRKS_MODE           = 201;
132
    const LIMITED_QUIRKS_MODE   = 202;
133
134
    // Marker to be placed in $a_formatting
135
    const MARKER     = 300;
136
137
    // Namespaces for foreign content
138
    const NS_HTML   = null; // to prevent DOM from requiring NS on everything
139
    const NS_MATHML = 'http://www.w3.org/1998/Math/MathML';
140
    const NS_SVG    = 'http://www.w3.org/2000/svg';
141
    const NS_XLINK  = 'http://www.w3.org/1999/xlink';
142
    const NS_XML    = 'http://www.w3.org/XML/1998/namespace';
143
    const NS_XMLNS  = 'http://www.w3.org/2000/xmlns/';
144
145
    // Different types of scopes to test for elements
146
    const SCOPE = 0;
147
    const SCOPE_LISTITEM = 1;
148
    const SCOPE_TABLE = 2;
149
150
    /**
151
     * HTML5_TreeBuilder constructor.
152
     */
153
    public function __construct() {
154
        $this->mode = self::INITIAL;
155
        $this->dom = new DOMDocument;
156
157
        $this->dom->encoding = 'UTF-8';
158
        $this->dom->preserveWhiteSpace = true;
159
        $this->dom->substituteEntities = true;
160
        $this->dom->strictErrorChecking = false;
161
    }
162
163
    public function getQuirksMode(){
164
      return $this->quirks_mode;
165
    }
166
167
    /**
168
     * Process tag tokens
169
     *
170
     * @param $token
171
     * @param null $mode
172
     */
173
    public function emitToken($token, $mode = null) {
174
        // XXX: ignore parse errors... why are we emitting them, again?
175
        if ($token['type'] === HTML5_Tokenizer::PARSEERROR) {
176
            return;
177
        }
178
        if ($mode === null) {
179
            $mode = $this->mode;
180
        }
181
182
        /*
183
        $backtrace = debug_backtrace();
184
        if ($backtrace[1]['class'] !== 'HTML5_TreeBuilder') echo "--\n";
185
        echo $this->strConst($mode);
186
        if ($this->original_mode) echo " (originally ".$this->strConst($this->original_mode).")";
187
        echo "\n  ";
188
        token_dump($token);
189
        $this->printStack();
190
        $this->printActiveFormattingElements();
191
        if ($this->foster_parent) echo "  -> this is a foster parent mode\n";
192
        if ($this->flag_frameset_ok) echo "  -> frameset ok\n";
193
        */
194
195
        if ($this->ignore_lf_token) {
196
            $this->ignore_lf_token--;
197
        }
198
        $this->ignored = false;
199
200
        switch ($mode) {
201
            case self::INITIAL:
202
203
                /* A character token that is one of U+0009 CHARACTER TABULATION,
204
                 * U+000A LINE FEED (LF), U+000C FORM FEED (FF),  or U+0020 SPACE */
205
                if ($token['type'] === HTML5_Tokenizer::SPACECHARACTER) {
206
                    /* Ignore the token. */
207
                    $this->ignored = true;
208
                } elseif ($token['type'] === HTML5_Tokenizer::DOCTYPE) {
209
                    if (
210
                        $token['name'] !== 'html' || !empty($token['public']) ||
211
                        !empty($token['system']) || $token !== 'about:legacy-compat'
212
                    ) {
213
                        /* If the DOCTYPE token's name is not a case-sensitive match
214
                         * for the string "html", or if the token's public identifier
215
                         * is not missing, or if the token's system identifier is
216
                         * neither missing nor a case-sensitive match for the string
217
                         * "about:legacy-compat", then there is a parse error (this
218
                         * is the DOCTYPE parse error). */
219
                        // DOCTYPE parse error
220
                    }
221
                    /* Append a DocumentType node to the Document node, with the name
222
                     * attribute set to the name given in the DOCTYPE token, or the
223
                     * empty string if the name was missing; the publicId attribute
224
                     * set to the public identifier given in the DOCTYPE token, or
225
                     * the empty string if the public identifier was missing; the
226
                     * systemId attribute set to the system identifier given in the
227
                     * DOCTYPE token, or the empty string if the system identifier
228
                     * was missing; and the other attributes specific to
229
                     * DocumentType objects set to null and empty lists as
230
                     * appropriate. Associate the DocumentType node with the
231
                     * Document object so that it is returned as the value of the
232
                     * doctype attribute of the Document object. */
233
                    if (!isset($token['public'])) {
234
                        $token['public'] = null;
235
                    }
236
                    if (!isset($token['system'])) {
237
                        $token['system'] = null;
238
                    }
239
                    // XDOM
240
                    // Yes this is hacky. I'm kind of annoyed that I can't appendChild
241
                    // a doctype to DOMDocument. Maybe I haven't chanted the right
242
                    // syllables.
243
                    $impl = new DOMImplementation();
244
                    // This call can fail for particularly pathological cases (namely,
245
                    // the qualifiedName parameter ($token['name']) could be missing.
246
                    if ($token['name']) {
247
                        $doctype = $impl->createDocumentType($token['name'], $token['public'], $token['system']);
248
                        $this->dom->appendChild($doctype);
249
                    } else {
250
                        // It looks like libxml's not actually *able* to express this case.
251
                        // So... don't.
252
                        $this->dom->emptyDoctype = true;
253
                    }
254
                    $public = is_null($token['public']) ? false : strtolower($token['public']);
255
                    $system = is_null($token['system']) ? false : strtolower($token['system']);
256
                    $publicStartsWithForQuirks = array(
257
                     "+//silmaril//dtd html pro v0r11 19970101//",
258
                     "-//advasoft ltd//dtd html 3.0 aswedit + extensions//",
259
                     "-//as//dtd html 3.0 aswedit + extensions//",
260
                     "-//ietf//dtd html 2.0 level 1//",
261
                     "-//ietf//dtd html 2.0 level 2//",
262
                     "-//ietf//dtd html 2.0 strict level 1//",
263
                     "-//ietf//dtd html 2.0 strict level 2//",
264
                     "-//ietf//dtd html 2.0 strict//",
265
                     "-//ietf//dtd html 2.0//",
266
                     "-//ietf//dtd html 2.1e//",
267
                     "-//ietf//dtd html 3.0//",
268
                     "-//ietf//dtd html 3.2 final//",
269
                     "-//ietf//dtd html 3.2//",
270
                     "-//ietf//dtd html 3//",
271
                     "-//ietf//dtd html level 0//",
272
                     "-//ietf//dtd html level 1//",
273
                     "-//ietf//dtd html level 2//",
274
                     "-//ietf//dtd html level 3//",
275
                     "-//ietf//dtd html strict level 0//",
276
                     "-//ietf//dtd html strict level 1//",
277
                     "-//ietf//dtd html strict level 2//",
278
                     "-//ietf//dtd html strict level 3//",
279
                     "-//ietf//dtd html strict//",
280
                     "-//ietf//dtd html//",
281
                     "-//metrius//dtd metrius presentational//",
282
                     "-//microsoft//dtd internet explorer 2.0 html strict//",
283
                     "-//microsoft//dtd internet explorer 2.0 html//",
284
                     "-//microsoft//dtd internet explorer 2.0 tables//",
285
                     "-//microsoft//dtd internet explorer 3.0 html strict//",
286
                     "-//microsoft//dtd internet explorer 3.0 html//",
287
                     "-//microsoft//dtd internet explorer 3.0 tables//",
288
                     "-//netscape comm. corp.//dtd html//",
289
                     "-//netscape comm. corp.//dtd strict html//",
290
                     "-//o'reilly and associates//dtd html 2.0//",
291
                     "-//o'reilly and associates//dtd html extended 1.0//",
292
                     "-//o'reilly and associates//dtd html extended relaxed 1.0//",
293
                     "-//spyglass//dtd html 2.0 extended//",
294
                     "-//sq//dtd html 2.0 hotmetal + extensions//",
295
                     "-//sun microsystems corp.//dtd hotjava html//",
296
                     "-//sun microsystems corp.//dtd hotjava strict html//",
297
                     "-//w3c//dtd html 3 1995-03-24//",
298
                     "-//w3c//dtd html 3.2 draft//",
299
                     "-//w3c//dtd html 3.2 final//",
300
                     "-//w3c//dtd html 3.2//",
301
                     "-//w3c//dtd html 3.2s draft//",
302
                     "-//w3c//dtd html 4.0 frameset//",
303
                     "-//w3c//dtd html 4.0 transitional//",
304
                     "-//w3c//dtd html experimental 19960712//",
305
                     "-//w3c//dtd html experimental 970421//",
306
                     "-//w3c//dtd w3 html//",
307
                     "-//w3o//dtd w3 html 3.0//",
308
                     "-//webtechs//dtd mozilla html 2.0//",
309
                     "-//webtechs//dtd mozilla html//",
310
                    );
311
                    $publicSetToForQuirks = array(
312
                     "-//w3o//dtd w3 html strict 3.0//",
313
                     "-/w3c/dtd html 4.0 transitional/en",
314
                     "html",
315
                    );
316
                    $publicStartsWithAndSystemForQuirks = array(
317
                     "-//w3c//dtd html 4.01 frameset//",
318
                     "-//w3c//dtd html 4.01 transitional//",
319
                    );
320
                    $publicStartsWithForLimitedQuirks = array(
321
                     "-//w3c//dtd xhtml 1.0 frameset//",
322
                     "-//w3c//dtd xhtml 1.0 transitional//",
323
                    );
324
                    $publicStartsWithAndSystemForLimitedQuirks = array(
325
                     "-//w3c//dtd html 4.01 frameset//",
326
                     "-//w3c//dtd html 4.01 transitional//",
327
                    );
328
                    // first, do easy checks
329
                    if (
330
                        !empty($token['force-quirks']) ||
331
                        strtolower($token['name']) !== 'html'
332
                    ) {
333
                        $this->quirks_mode = self::QUIRKS_MODE;
334
                    } else {
335
                        do {
336
                            if ($system) {
337
                                foreach ($publicStartsWithAndSystemForQuirks as $x) {
338
                                    if (strncmp($public, $x, strlen($x)) === 0) {
339
                                        $this->quirks_mode = self::QUIRKS_MODE;
340
                                        break;
341
                                    }
342
                                }
343
                                if (!is_null($this->quirks_mode)) {
344
                                    break;
345
                                }
346
                                foreach ($publicStartsWithAndSystemForLimitedQuirks as $x) {
347
                                    if (strncmp($public, $x, strlen($x)) === 0) {
348
                                        $this->quirks_mode = self::LIMITED_QUIRKS_MODE;
349
                                        break;
350
                                    }
351
                                }
352
                                if (!is_null($this->quirks_mode)) {
353
                                    break;
354
                                }
355
                            }
356
                            foreach ($publicSetToForQuirks as $x) {
357
                                if ($public === $x) {
358
                                    $this->quirks_mode = self::QUIRKS_MODE;
359
                                    break;
360
                                }
361
                            }
362
                            if (!is_null($this->quirks_mode)) {
363
                                break;
364
                            }
365
                            foreach ($publicStartsWithForLimitedQuirks as $x) {
366
                                if (strncmp($public, $x, strlen($x)) === 0) {
367
                                    $this->quirks_mode = self::LIMITED_QUIRKS_MODE;
368
                                }
369
                            }
370
                            if (!is_null($this->quirks_mode)) {
371
                                break;
372
                            }
373
                            if ($system === "http://www.ibm.com/data/dtd/v11/ibmxhtml1-transitional.dtd") {
374
                                $this->quirks_mode = self::QUIRKS_MODE;
375
                                break;
376
                            }
377
                            foreach ($publicStartsWithForQuirks as $x) {
378
                                if (strncmp($public, $x, strlen($x)) === 0) {
379
                                    $this->quirks_mode = self::QUIRKS_MODE;
380
                                    break;
381
                                }
382
                            }
383
                            if (is_null($this->quirks_mode)) {
384
                                $this->quirks_mode = self::NO_QUIRKS;
385
                            }
386
                        } while (false);
387
                    }
388
                    $this->mode = self::BEFORE_HTML;
389
                } else {
390
                    // parse error
391
                    /* Switch the insertion mode to "before html", then reprocess the
392
                     * current token. */
393
                    $this->mode = self::BEFORE_HTML;
394
                    $this->quirks_mode = self::QUIRKS_MODE;
395
                    $this->emitToken($token);
396
                }
397
                break;
398
399
            case self::BEFORE_HTML:
400
                /* A DOCTYPE token */
401
                if ($token['type'] === HTML5_Tokenizer::DOCTYPE) {
402
                    // Parse error. Ignore the token.
403
                    $this->ignored = true;
404
405
                /* A comment token */
406
                } elseif ($token['type'] === HTML5_Tokenizer::COMMENT) {
407
                    /* Append a Comment node to the Document object with the data
408
                    attribute set to the data given in the comment token. */
409
                    // XDOM
410
                    $comment = $this->dom->createComment($token['data']);
411
                    $this->dom->appendChild($comment);
412
413
                /* A character token that is one of one of U+0009 CHARACTER TABULATION,
414
                U+000A LINE FEED (LF), U+000B LINE TABULATION, U+000C FORM FEED (FF),
415
                or U+0020 SPACE */
416
                } elseif ($token['type'] === HTML5_Tokenizer::SPACECHARACTER) {
417
                    /* Ignore the token. */
418
                    $this->ignored = true;
419
420
                /* A start tag whose tag name is "html" */
421
                } elseif ($token['type'] === HTML5_Tokenizer::STARTTAG && $token['name'] == 'html') {
422
                    /* Create an element for the token in the HTML namespace. Append it
423
                     * to the Document  object. Put this element in the stack of open
424
                     * elements. */
425
                    // XDOM
426
                    $html = $this->insertElement($token, false);
427
                    $this->dom->appendChild($html);
428
                    $this->stack[] = $html;
429
430
                    $this->mode = self::BEFORE_HEAD;
431
432
                } else {
433
                    /* Create an html element. Append it to the Document object. Put
434
                     * this element in the stack of open elements. */
435
                    // XDOM
436
                    $html = $this->dom->createElementNS(self::NS_HTML, 'html');
437
                    $this->dom->appendChild($html);
438
                    $this->stack[] = $html;
439
440
                    /* Switch the insertion mode to "before head", then reprocess the
441
                     * current token. */
442
                    $this->mode = self::BEFORE_HEAD;
443
                    $this->emitToken($token);
444
                }
445
                break;
446
447
            case self::BEFORE_HEAD:
448
                /* A character token that is one of one of U+0009 CHARACTER TABULATION,
449
                U+000A LINE FEED (LF), U+000B LINE TABULATION, U+000C FORM FEED (FF),
450
                or U+0020 SPACE */
451
                if ($token['type'] === HTML5_Tokenizer::SPACECHARACTER) {
452
                    /* Ignore the token. */
453
                    $this->ignored = true;
454
455
                /* A comment token */
456
                } elseif ($token['type'] === HTML5_Tokenizer::COMMENT) {
457
                    /* Append a Comment node to the current node with the data attribute
458
                    set to the data given in the comment token. */
459
                    $this->insertComment($token['data']);
460
461
                /* A DOCTYPE token */
462
                } elseif ($token['type'] === HTML5_Tokenizer::DOCTYPE) {
463
                    /* Parse error. Ignore the token */
464
                    $this->ignored = true;
465
                    // parse error
466
467
                /* A start tag token with the tag name "html" */
468
                } elseif ($token['type'] === HTML5_Tokenizer::STARTTAG && $token['name'] === 'html') {
469
                    /* Process the token using the rules for the "in body"
470
                     * insertion mode. */
471
                    $this->processWithRulesFor($token, self::IN_BODY);
472
473
                /* A start tag token with the tag name "head" */
474
                } elseif ($token['type'] === HTML5_Tokenizer::STARTTAG && $token['name'] === 'head') {
475
                    /* Insert an HTML element for the token. */
476
                    $element = $this->insertElement($token);
477
478
                    /* Set the head element pointer to this new element node. */
479
                    $this->head_pointer = $element;
480
481
                    /* Change the insertion mode to "in head". */
482
                    $this->mode = self::IN_HEAD;
483
484
                /* An end tag whose tag name is one of: "head", "body", "html", "br" */
485
                } elseif (
486
                    $token['type'] === HTML5_Tokenizer::ENDTAG && (
487
                        $token['name'] === 'head' || $token['name'] === 'body' ||
488
                        $token['name'] === 'html' || $token['name'] === 'br'
489
                )) {
490
                    /* Act as if a start tag token with the tag name "head" and no
491
                     * attributes had been seen, then reprocess the current token. */
492
                    $this->emitToken(array(
493
                        'name' => 'head',
494
                        'type' => HTML5_Tokenizer::STARTTAG,
495
                        'attr' => array()
496
                    ));
497
                    $this->emitToken($token);
498
499
                /* Any other end tag */
500
                } elseif ($token['type'] === HTML5_Tokenizer::ENDTAG) {
501
                    /* Parse error. Ignore the token. */
502
                    $this->ignored = true;
503
504
                } else {
505
                    /* Act as if a start tag token with the tag name "head" and no
506
                     * attributes had been seen, then reprocess the current token.
507
                     * Note: This will result in an empty head element being
508
                     * generated, with the current token being reprocessed in the
509
                     * "after head" insertion mode. */
510
                    $this->emitToken(array(
511
                        'name' => 'head',
512
                        'type' => HTML5_Tokenizer::STARTTAG,
513
                        'attr' => array()
514
                    ));
515
                    $this->emitToken($token);
516
                }
517
                break;
518
519
            case self::IN_HEAD:
520
                /* A character token that is one of one of U+0009 CHARACTER TABULATION,
521
                U+000A LINE FEED (LF), U+000B LINE TABULATION, U+000C FORM FEED (FF),
522
                or U+0020 SPACE. */
523
                if ($token['type'] === HTML5_Tokenizer::SPACECHARACTER) {
524
                    /* Insert the character into the current node. */
525
                    $this->insertText($token['data']);
526
527
                /* A comment token */
528
                } elseif ($token['type'] === HTML5_Tokenizer::COMMENT) {
529
                    /* Append a Comment node to the current node with the data attribute
530
                    set to the data given in the comment token. */
531
                    $this->insertComment($token['data']);
532
533
                /* A DOCTYPE token */
534
                } elseif ($token['type'] === HTML5_Tokenizer::DOCTYPE) {
535
                    /* Parse error. Ignore the token. */
536
                    $this->ignored = true;
537
                    // parse error
538
539
                /* A start tag whose tag name is "html" */
540
                } elseif ($token['type'] === HTML5_Tokenizer::STARTTAG &&
541
                $token['name'] === 'html') {
542
                    $this->processWithRulesFor($token, self::IN_BODY);
543
544
                /* A start tag whose tag name is one of: "base", "command", "link" */
545
                } elseif ($token['type'] === HTML5_Tokenizer::STARTTAG &&
546
                ($token['name'] === 'base' || $token['name'] === 'command' ||
547
                $token['name'] === 'link')) {
548
                    /* Insert an HTML element for the token. Immediately pop the
549
                     * current node off the stack of open elements. */
550
                    $this->insertElement($token);
551
                    array_pop($this->stack);
552
553
                    // YYY: Acknowledge the token's self-closing flag, if it is set.
554
555
                /* A start tag whose tag name is "meta" */
556
                } elseif ($token['type'] === HTML5_Tokenizer::STARTTAG && $token['name'] === 'meta') {
557
                    /* Insert an HTML element for the token. Immediately pop the
558
                     * current node off the stack of open elements. */
559
                    $this->insertElement($token);
560
                    array_pop($this->stack);
561
562
                    // XERROR: Acknowledge the token's self-closing flag, if it is set.
563
564
                    // XENCODING: If the element has a charset attribute, and its value is a
565
                    // supported encoding, and the confidence is currently tentative,
566
                    // then change the encoding to the encoding given by the value of
567
                    // the charset attribute.
568
                    //
569
                    // Otherwise, if the element has a content attribute, and applying
570
                    // the algorithm for extracting an encoding from a Content-Type to
571
                    // its value returns a supported encoding encoding, and the
572
                    // confidence is currently tentative, then change the encoding to
573
                    // the encoding encoding.
574
575
                /* A start tag with the tag name "title" */
576
                } elseif ($token['type'] === HTML5_Tokenizer::STARTTAG && $token['name'] === 'title') {
577
                    $this->insertRCDATAElement($token);
578
579
                /* A start tag whose tag name is "noscript", if the scripting flag is enabled, or
580
                 * A start tag whose tag name is one of: "noframes", "style" */
581
                } elseif ($token['type'] === HTML5_Tokenizer::STARTTAG &&
582
                ($token['name'] === 'noscript' || $token['name'] === 'noframes' || $token['name'] === 'style')) {
583
                    // XSCRIPT: Scripting flag not respected
584
                    $this->insertCDATAElement($token);
585
586
                // XSCRIPT: Scripting flag disable not implemented
587
588
                /* A start tag with the tag name "script" */
589
                } elseif ($token['type'] === HTML5_Tokenizer::STARTTAG && $token['name'] === 'script') {
590
                    /* 1. Create an element for the token in the HTML namespace. */
591
                    $node = $this->insertElement($token, false);
592
593
                    /* 2. Mark the element as being "parser-inserted" */
594
                    // Uhhh... XSCRIPT
595
596
                    /* 3. If the parser was originally created for the HTML
597
                     * fragment parsing algorithm, then mark the script element as
598
                     * "already executed". (fragment case) */
599
                    // ditto... XSCRIPT
600
601
                    /* 4. Append the new element to the current node  and push it onto
602
                     * the stack of open elements.  */
603
                    end($this->stack)->appendChild($node);
604
                    $this->stack[] = $node;
605
                    // I guess we could squash these together
606
607
                    /* 6. Let the original insertion mode be the current insertion mode. */
608
                    $this->original_mode = $this->mode;
609
                    /* 7. Switch the insertion mode to "in CDATA/RCDATA" */
610
                    $this->mode = self::IN_CDATA_RCDATA;
611
                    /* 5. Switch the tokeniser's content model flag to the CDATA state. */
612
                    $this->content_model = HTML5_Tokenizer::CDATA;
613
614
                /* An end tag with the tag name "head" */
615
                } elseif ($token['type'] === HTML5_Tokenizer::ENDTAG && $token['name'] === 'head') {
616
                    /* Pop the current node (which will be the head element) off the stack of open elements. */
617
                    array_pop($this->stack);
618
619
                    /* Change the insertion mode to "after head". */
620
                    $this->mode = self::AFTER_HEAD;
621
622
                // Slight logic inversion here to minimize duplication
623
                /* A start tag with the tag name "head". */
624
                /* An end tag whose tag name is not one of: "body", "html", "br" */
625
                } elseif (($token['type'] === HTML5_Tokenizer::STARTTAG && $token['name'] === 'head') ||
626
                ($token['type'] === HTML5_Tokenizer::ENDTAG && $token['name'] !== 'html' &&
627
                $token['name'] !== 'body' && $token['name'] !== 'br')) {
628
                    // Parse error. Ignore the token.
629
                    $this->ignored = true;
630
631
                /* Anything else */
632
                } else {
633
                    /* Act as if an end tag token with the tag name "head" had been
634
                     * seen, and reprocess the current token. */
635
                    $this->emitToken(array(
636
                        'name' => 'head',
637
                        'type' => HTML5_Tokenizer::ENDTAG
638
                    ));
639
640
                    /* Then, reprocess the current token. */
641
                    $this->emitToken($token);
642
                }
643
                break;
644
645
            case self::IN_HEAD_NOSCRIPT:
646
                if ($token['type'] === HTML5_Tokenizer::DOCTYPE) {
647
                    // parse error
648
                } elseif ($token['type'] === HTML5_Tokenizer::STARTTAG && $token['name'] === 'html') {
649
                    $this->processWithRulesFor($token, self::IN_BODY);
650
                } elseif ($token['type'] === HTML5_Tokenizer::ENDTAG && $token['name'] === 'noscript') {
651
                    /* Pop the current node (which will be a noscript element) from the
652
                     * stack of open elements; the new current node will be a head
653
                     * element. */
654
                    array_pop($this->stack);
655
                    $this->mode = self::IN_HEAD;
656
                } elseif (
657
                    ($token['type'] === HTML5_Tokenizer::SPACECHARACTER) ||
658
                    ($token['type'] === HTML5_Tokenizer::COMMENT) ||
659
                    ($token['type'] === HTML5_Tokenizer::STARTTAG && (
660
                        $token['name'] === 'link' || $token['name'] === 'meta' ||
661
                        $token['name'] === 'noframes' || $token['name'] === 'style'))) {
662
                    $this->processWithRulesFor($token, self::IN_HEAD);
663
                // inverted logic
664
                } elseif (
665
                    ($token['type'] === HTML5_Tokenizer::STARTTAG && (
666
                        $token['name'] === 'head' || $token['name'] === 'noscript')) ||
667
                    ($token['type'] === HTML5_Tokenizer::ENDTAG &&
668
                        $token['name'] !== 'br')) {
669
                    // parse error
670
                } else {
671
                    // parse error
672
                    $this->emitToken(array(
673
                        'type' => HTML5_Tokenizer::ENDTAG,
674
                        'name' => 'noscript',
675
                    ));
676
                    $this->emitToken($token);
677
                }
678
                break;
679
680
            case self::AFTER_HEAD:
681
                /* Handle the token as follows: */
682
683
                /* A character token that is one of one of U+0009 CHARACTER TABULATION,
684
                U+000A LINE FEED (LF), U+000B LINE TABULATION, U+000C FORM FEED (FF),
685
                or U+0020 SPACE */
686
                if ($token['type'] === HTML5_Tokenizer::SPACECHARACTER) {
687
                    /* Append the character to the current node. */
688
                    $this->insertText($token['data']);
689
690
                /* A comment token */
691
                } elseif ($token['type'] === HTML5_Tokenizer::COMMENT) {
692
                    /* Append a Comment node to the current node with the data attribute
693
                    set to the data given in the comment token. */
694
                    $this->insertComment($token['data']);
695
696
                } elseif ($token['type'] === HTML5_Tokenizer::DOCTYPE) {
697
                    // parse error
698
699
                } elseif ($token['type'] === HTML5_Tokenizer::STARTTAG && $token['name'] === 'html') {
700
                    $this->processWithRulesFor($token, self::IN_BODY);
701
702
                /* A start tag token with the tag name "body" */
703
                } elseif ($token['type'] === HTML5_Tokenizer::STARTTAG && $token['name'] === 'body') {
704
                    $this->insertElement($token);
705
706
                    /* Set the frameset-ok flag to "not ok". */
707
                    $this->flag_frameset_ok = false;
708
709
                    /* Change the insertion mode to "in body". */
710
                    $this->mode = self::IN_BODY;
711
712
                /* A start tag token with the tag name "frameset" */
713
                } elseif ($token['type'] === HTML5_Tokenizer::STARTTAG && $token['name'] === 'frameset') {
714
                    /* Insert a frameset element for the token. */
715
                    $this->insertElement($token);
716
717
                    /* Change the insertion mode to "in frameset". */
718
                    $this->mode = self::IN_FRAMESET;
719
720
                /* A start tag token whose tag name is one of: "base", "link", "meta",
721
                "script", "style", "title" */
722
                } elseif ($token['type'] === HTML5_Tokenizer::STARTTAG && in_array($token['name'],
723
                array('base', 'link', 'meta', 'noframes', 'script', 'style', 'title'))) {
724
                    // parse error
725
                    /* Push the node pointed to by the head element pointer onto the
726
                     * stack of open elements. */
727
                    $this->stack[] = $this->head_pointer;
728
                    $this->processWithRulesFor($token, self::IN_HEAD);
729
                    array_splice($this->stack, array_search($this->head_pointer, $this->stack, true), 1);
730
731
                // inversion of specification
732
                } elseif (
733
                ($token['type'] === HTML5_Tokenizer::STARTTAG && $token['name'] === 'head') ||
734
                ($token['type'] === HTML5_Tokenizer::ENDTAG &&
735
                    $token['name'] !== 'body' && $token['name'] !== 'html' &&
736
                    $token['name'] !== 'br')) {
737
                    // parse error
738
739
                /* Anything else */
740
                } else {
741
                    $this->emitToken(array(
742
                        'name' => 'body',
743
                        'type' => HTML5_Tokenizer::STARTTAG,
744
                        'attr' => array()
745
                    ));
746
                    $this->flag_frameset_ok = true;
747
                    $this->emitToken($token);
748
                }
749
                break;
750
751
            case self::IN_BODY:
752
                /* Handle the token as follows: */
753
754
                switch($token['type']) {
755
                    /* A character token */
756
                    case HTML5_Tokenizer::CHARACTER:
757
                    case HTML5_Tokenizer::SPACECHARACTER:
758
                        /* Reconstruct the active formatting elements, if any. */
759
                        $this->reconstructActiveFormattingElements();
760
761
                        /* Append the token's character to the current node. */
762
                        $this->insertText($token['data']);
763
764
                        /* If the token is not one of U+0009 CHARACTER TABULATION,
765
                         * U+000A LINE FEED (LF), U+000C FORM FEED (FF),  or U+0020
766
                         * SPACE, then set the frameset-ok flag to "not ok". */
767
                        // i.e., if any of the characters is not whitespace
768
                        if (strlen($token['data']) !== strspn($token['data'], HTML5_Tokenizer::WHITESPACE)) {
769
                            $this->flag_frameset_ok = false;
770
                        }
771
                    break;
772
773
                    /* A comment token */
774
                    case HTML5_Tokenizer::COMMENT:
775
                        /* Append a Comment node to the current node with the data
776
                        attribute set to the data given in the comment token. */
777
                        $this->insertComment($token['data']);
778
                    break;
779
780
                    case HTML5_Tokenizer::DOCTYPE:
781
                        // parse error
782
                    break;
783
784
                    case HTML5_Tokenizer::EOF:
785
                        // parse error
786
                    break;
787
788
                    case HTML5_Tokenizer::STARTTAG:
789
                    switch($token['name']) {
790
                        case 'html':
791
                            // parse error
792
                            /* For each attribute on the token, check to see if the
793
                             * attribute is already present on the top element of the
794
                             * stack of open elements. If it is not, add the attribute
795
                             * and its corresponding value to that element. */
796
                            foreach($token['attr'] as $attr) {
797
                                if (!$this->stack[0]->hasAttribute($attr['name'])) {
798
                                    $this->stack[0]->setAttribute($attr['name'], $attr['value']);
799
                                }
800
                            }
801
                        break;
802
803
                        case 'base': case 'command': case 'link': case 'meta': case 'noframes':
804
                        case 'script': case 'style': case 'title':
805
                            /* Process the token as if the insertion mode had been "in
806
                            head". */
807
                            $this->processWithRulesFor($token, self::IN_HEAD);
808
                        break;
809
810
                        /* A start tag token with the tag name "body" */
811
                        case 'body':
812
                            /* Parse error. If the second element on the stack of open
813
                            elements is not a body element, or, if the stack of open
814
                            elements has only one node on it, then ignore the token.
815
                            (fragment case) */
816
                            if (count($this->stack) === 1 || $this->stack[1]->tagName !== 'body') {
817
                                $this->ignored = true;
818
                                // Ignore
819
820
                            /* Otherwise, for each attribute on the token, check to see
821
                            if the attribute is already present on the body element (the
822
                            second element)    on the stack of open elements. If it is not,
823
                            add the attribute and its corresponding value to that
824
                            element. */
825
                            } else {
826
                                foreach($token['attr'] as $attr) {
827
                                    if (!$this->stack[1]->hasAttribute($attr['name'])) {
828
                                        $this->stack[1]->setAttribute($attr['name'], $attr['value']);
829
                                    }
830
                                }
831
                            }
832
                        break;
833
834
                        case 'frameset':
835
                            // parse error
836
                            /* If the second element on the stack of open elements is
837
                             * not a body element, or, if the stack of open elements
838
                             * has only one node on it, then ignore the token.
839
                             * (fragment case) */
840
                            if (count($this->stack) === 1 || $this->stack[1]->tagName !== 'body') {
841
                                $this->ignored = true;
842
                                // Ignore
843
                            } elseif (!$this->flag_frameset_ok) {
844
                                $this->ignored = true;
845
                                // Ignore
846
                            } else {
847
                                /* 1. Remove the second element on the stack of open
848
                                 * elements from its parent node, if it has one.  */
849
                                if ($this->stack[1]->parentNode) {
850
                                    $this->stack[1]->parentNode->removeChild($this->stack[1]);
851
                                }
852
853
                                /* 2. Pop all the nodes from the bottom of the stack of
854
                                 * open elements, from the current node up to the root
855
                                 * html element. */
856
                                array_splice($this->stack, 1);
857
858
                                $this->insertElement($token);
859
                                $this->mode = self::IN_FRAMESET;
860
                            }
861
                        break;
862
863
                        // in spec, there is a diversion here
864
865
                        case 'address': case 'article': case 'aside': case 'blockquote':
866
                        case 'center': case 'datagrid': case 'details': case 'dir':
867
                        case 'div': case 'dl': case 'fieldset': case 'figure': case 'footer':
868
                        case 'header': case 'hgroup': case 'menu': case 'nav':
869
                        case 'ol': case 'p': case 'section': case 'ul':
870
                            /* If the stack of open elements has a p element in scope,
871
                            then act as if an end tag with the tag name p had been
872
                            seen. */
873
                            if ($this->elementInScope('p')) {
874
                                $this->emitToken(array(
875
                                    'name' => 'p',
876
                                    'type' => HTML5_Tokenizer::ENDTAG
877
                                ));
878
                            }
879
880
                            /* Insert an HTML element for the token. */
881
                            $this->insertElement($token);
882
                        break;
883
884
                        /* A start tag whose tag name is one of: "h1", "h2", "h3", "h4",
885
                        "h5", "h6" */
886
                        case 'h1': case 'h2': case 'h3': case 'h4': case 'h5': case 'h6':
887
                            /* If the stack of open elements has a p  element in scope,
888
                            then act as if an end tag with the tag name p had been seen. */
889
                            if ($this->elementInScope('p')) {
890
                                $this->emitToken(array(
891
                                    'name' => 'p',
892
                                    'type' => HTML5_Tokenizer::ENDTAG
893
                                ));
894
                            }
895
896
                            /* If the current node is an element whose tag name is one
897
                             * of "h1", "h2", "h3", "h4", "h5", or "h6", then this is a
898
                             * parse error; pop the current node off the stack of open
899
                             * elements. */
900
                            $peek = array_pop($this->stack);
901
                            if (in_array($peek->tagName, array("h1", "h2", "h3", "h4", "h5", "h6"))) {
902
                                // parse error
903
                            } else {
904
                                $this->stack[] = $peek;
905
                            }
906
907
                            /* Insert an HTML element for the token. */
908
                            $this->insertElement($token);
909
                        break;
910
911
                        case 'pre': case 'listing':
912
                            /* If the stack of open elements has a p  element in scope,
913
                            then act as if an end tag with the tag name p had been seen. */
914
                            if ($this->elementInScope('p')) {
915
                                $this->emitToken(array(
916
                                    'name' => 'p',
917
                                    'type' => HTML5_Tokenizer::ENDTAG
918
                                ));
919
                            }
920
                            $this->insertElement($token);
921
                            /* If the next token is a U+000A LINE FEED (LF) character
922
                             * token, then ignore that token and move on to the next
923
                             * one. (Newlines at the start of pre blocks are ignored as
924
                             * an authoring convenience.) */
925
                            $this->ignore_lf_token = 2;
926
                            $this->flag_frameset_ok = false;
927
                        break;
928
929
                        /* A start tag whose tag name is "form" */
930
                        case 'form':
931
                            /* If the form element pointer is not null, ignore the
932
                            token with a parse error. */
933
                            if ($this->form_pointer !== null) {
934
                                $this->ignored = true;
935
                                // Ignore.
936
937
                            /* Otherwise: */
938
                            } else {
939
                                /* If the stack of open elements has a p element in
940
                                scope, then act as if an end tag with the tag name p
941
                                had been seen. */
942
                                if ($this->elementInScope('p')) {
943
                                    $this->emitToken(array(
944
                                        'name' => 'p',
945
                                        'type' => HTML5_Tokenizer::ENDTAG
946
                                    ));
947
                                }
948
949
                                /* Insert an HTML element for the token, and set the
950
                                form element pointer to point to the element created. */
951
                                $element = $this->insertElement($token);
952
                                $this->form_pointer = $element;
953
                            }
954
                        break;
955
956
                        // condensed specification
957
                        case 'li': case 'dc': case 'dd': case 'ds': case 'dt':
958
                            /* 1. Set the frameset-ok flag to "not ok". */
959
                            $this->flag_frameset_ok = false;
960
961
                            $stack_length = count($this->stack) - 1;
962
                            for($n = $stack_length; 0 <= $n; $n--) {
963
                                /* 2. Initialise node to be the current node (the
964
                                bottommost node of the stack). */
965
                                $stop = false;
966
                                $node = $this->stack[$n];
967
                                $cat  = $this->getElementCategory($node);
968
969
                                // for case 'li':
970
                                /* 3. If node is an li element, then act as if an end
971
                                 * tag with the tag name "li" had been seen, then jump
972
                                 * to the last step.  */
973
                                // for case 'dc': case 'dd': case 'ds': case 'dt':
974
                                /* If node is a dc, dd, ds or dt element, then act as if an end
975
                                 * tag with the same tag name as node had been seen, then
976
                                 * jump to the last step. */
977
                                if (($token['name'] === 'li' && $node->tagName === 'li') ||
978
                                ($token['name'] !== 'li' && ($node->tagName == 'dc' || $node->tagName === 'dd' || $node->tagName == 'ds' || $node->tagName === 'dt'))) { // limited conditional
979
                                    $this->emitToken(array(
980
                                        'type' => HTML5_Tokenizer::ENDTAG,
981
                                        'name' => $node->tagName,
982
                                    ));
983
                                    break;
984
                                }
985
986
                                /* 4. If node is not in the formatting category, and is
987
                                not    in the phrasing category, and is not an address,
988
                                div or p element, then stop this algorithm. */
989
                                if ($cat !== self::FORMATTING && $cat !== self::PHRASING &&
990
                                $node->tagName !== 'address' && $node->tagName !== 'div' &&
991
                                $node->tagName !== 'p') {
992
                                    break;
993
                                }
994
995
                                /* 5. Otherwise, set node to the previous entry in the
996
                                 * stack of open elements and return to step 2. */
997
                            }
998
999
                            /* 6. This is the last step. */
1000
1001
                            /* If the stack of open elements has a p  element in scope,
1002
                            then act as if an end tag with the tag name p had been
1003
                            seen. */
1004
                            if ($this->elementInScope('p')) {
1005
                                $this->emitToken(array(
1006
                                    'name' => 'p',
1007
                                    'type' => HTML5_Tokenizer::ENDTAG
1008
                                ));
1009
                            }
1010
1011
                            /* Finally, insert an HTML element with the same tag
1012
                            name as the    token's. */
1013
                            $this->insertElement($token);
1014
                        break;
1015
1016
                        /* A start tag token whose tag name is "plaintext" */
1017
                        case 'plaintext':
1018
                            /* If the stack of open elements has a p  element in scope,
1019
                            then act as if an end tag with the tag name p had been
1020
                            seen. */
1021
                            if ($this->elementInScope('p')) {
1022
                                $this->emitToken(array(
1023
                                    'name' => 'p',
1024
                                    'type' => HTML5_Tokenizer::ENDTAG
1025
                                ));
1026
                            }
1027
1028
                            /* Insert an HTML element for the token. */
1029
                            $this->insertElement($token);
1030
1031
                            $this->content_model = HTML5_Tokenizer::PLAINTEXT;
1032
                        break;
1033
1034
                        // more diversions
1035
1036
                        /* A start tag whose tag name is "a" */
1037
                        case 'a':
1038
                            /* If the list of active formatting elements contains
1039
                            an element whose tag name is "a" between the end of the
1040
                            list and the last marker on the list (or the start of
1041
                            the list if there is no marker on the list), then this
1042
                            is a parse error; act as if an end tag with the tag name
1043
                            "a" had been seen, then remove that element from the list
1044
                            of active formatting elements and the stack of open
1045
                            elements if the end tag didn't already remove it (it
1046
                            might not have if the element is not in table scope). */
1047
                            $leng = count($this->a_formatting);
1048
1049
                            for ($n = $leng - 1; $n >= 0; $n--) {
1050
                                if ($this->a_formatting[$n] === self::MARKER) {
1051
                                    break;
1052
1053
                                } elseif ($this->a_formatting[$n]->tagName === 'a') {
1054
                                    $a = $this->a_formatting[$n];
1055
                                    $this->emitToken(array(
1056
                                        'name' => 'a',
1057
                                        'type' => HTML5_Tokenizer::ENDTAG
1058
                                    ));
1059
                                    if (in_array($a, $this->a_formatting)) {
1060
                                        $a_i = array_search($a, $this->a_formatting, true);
1061
                                        if ($a_i !== false) {
1062
                                            array_splice($this->a_formatting, $a_i, 1);
1063
                                        }
1064
                                    }
1065
                                    if (in_array($a, $this->stack)) {
1066
                                        $a_i = array_search($a, $this->stack, true);
1067
                                        if ($a_i !== false) {
1068
                                            array_splice($this->stack, $a_i, 1);
1069
                                        }
1070
                                    }
1071
                                    break;
1072
                                }
1073
                            }
1074
1075
                            /* Reconstruct the active formatting elements, if any. */
1076
                            $this->reconstructActiveFormattingElements();
1077
1078
                            /* Insert an HTML element for the token. */
1079
                            $el = $this->insertElement($token);
1080
1081
                            /* Add that element to the list of active formatting
1082
                            elements. */
1083
                            $this->a_formatting[] = $el;
1084
                        break;
1085
1086
                        case 'b': case 'big': case 'code': case 'em': case 'font': case 'i':
1087
                        case 's': case 'small': case 'strike':
1088
                        case 'strong': case 'tt': case 'u':
1089
                            /* Reconstruct the active formatting elements, if any. */
1090
                            $this->reconstructActiveFormattingElements();
1091
1092
                            /* Insert an HTML element for the token. */
1093
                            $el = $this->insertElement($token);
1094
1095
                            /* Add that element to the list of active formatting
1096
                            elements. */
1097
                            $this->a_formatting[] = $el;
1098
                        break;
1099
1100
                        case 'nobr':
1101
                            /* Reconstruct the active formatting elements, if any. */
1102
                            $this->reconstructActiveFormattingElements();
1103
1104
                            /* If the stack of open elements has a nobr element in
1105
                             * scope, then this is a parse error; act as if an end tag
1106
                             * with the tag name "nobr" had been seen, then once again
1107
                             * reconstruct the active formatting elements, if any. */
1108
                            if ($this->elementInScope('nobr')) {
1109
                                $this->emitToken(array(
1110
                                    'name' => 'nobr',
1111
                                    'type' => HTML5_Tokenizer::ENDTAG,
1112
                                ));
1113
                                $this->reconstructActiveFormattingElements();
1114
                            }
1115
1116
                            /* Insert an HTML element for the token. */
1117
                            $el = $this->insertElement($token);
1118
1119
                            /* Add that element to the list of active formatting
1120
                            elements. */
1121
                            $this->a_formatting[] = $el;
1122
                        break;
1123
1124
                        // another diversion
1125
1126
                        /* A start tag token whose tag name is "button" */
1127
                        case 'button':
1128
                            /* If the stack of open elements has a button element in scope,
1129
                            then this is a parse error; act as if an end tag with the tag
1130
                            name "button" had been seen, then reprocess the token. (We don't
1131
                            do that. Unnecessary.) (I hope you're right! -- ezyang) */
1132
                            if ($this->elementInScope('button')) {
1133
                                $this->emitToken(array(
1134
                                    'name' => 'button',
1135
                                    'type' => HTML5_Tokenizer::ENDTAG
1136
                                ));
1137
                            }
1138
1139
                            /* Reconstruct the active formatting elements, if any. */
1140
                            $this->reconstructActiveFormattingElements();
1141
1142
                            /* Insert an HTML element for the token. */
1143
                            $this->insertElement($token);
1144
1145
                            /* Insert a marker at the end of the list of active
1146
                            formatting elements. */
1147
                            $this->a_formatting[] = self::MARKER;
1148
1149
                            $this->flag_frameset_ok = false;
1150
                        break;
1151
1152
                        case 'applet': case 'marquee': case 'object':
1153
                            /* Reconstruct the active formatting elements, if any. */
1154
                            $this->reconstructActiveFormattingElements();
1155
1156
                            /* Insert an HTML element for the token. */
1157
                            $this->insertElement($token);
1158
1159
                            /* Insert a marker at the end of the list of active
1160
                            formatting elements. */
1161
                            $this->a_formatting[] = self::MARKER;
1162
1163
                            $this->flag_frameset_ok = false;
1164
                        break;
1165
1166
                        // spec diversion
1167
1168
                        /* A start tag whose tag name is "table" */
1169
                        case 'table':
1170
                            /* If the Document is not set to quirks mode, and the
1171
                             * stack of open elements has a p element in scope, then
1172
                             * act as if an end tag with the tag name "p" had been
1173
                             * seen. */
1174
                            if ($this->quirks_mode !== self::QUIRKS_MODE &&
1175
                            $this->elementInScope('p')) {
1176
                                $this->emitToken(array(
1177
                                    'name' => 'p',
1178
                                    'type' => HTML5_Tokenizer::ENDTAG
1179
                                ));
1180
                            }
1181
1182
                            /* Insert an HTML element for the token. */
1183
                            $this->insertElement($token);
1184
1185
                            $this->flag_frameset_ok = false;
1186
1187
                            /* Change the insertion mode to "in table". */
1188
                            $this->mode = self::IN_TABLE;
1189
                        break;
1190
1191
                        /* A start tag whose tag name is one of: "area", "basefont",
1192
                        "bgsound", "br", "embed", "img", "param", "spacer", "wbr" */
1193
                        case 'area': case 'basefont': case 'bgsound': case 'br':
1194
                        case 'embed': case 'img': case 'input': case 'keygen': case 'spacer':
1195
                        case 'wbr':
1196
                            /* Reconstruct the active formatting elements, if any. */
1197
                            $this->reconstructActiveFormattingElements();
1198
1199
                            /* Insert an HTML element for the token. */
1200
                            $this->insertElement($token);
1201
1202
                            /* Immediately pop the current node off the stack of open elements. */
1203
                            array_pop($this->stack);
1204
1205
                            // YYY: Acknowledge the token's self-closing flag, if it is set.
1206
1207
                            $this->flag_frameset_ok = false;
1208
                        break;
1209
1210
                        case 'param': case 'source':
1211
                            /* Insert an HTML element for the token. */
1212
                            $this->insertElement($token);
1213
1214
                            /* Immediately pop the current node off the stack of open elements. */
1215
                            array_pop($this->stack);
1216
1217
                            // YYY: Acknowledge the token's self-closing flag, if it is set.
1218
                        break;
1219
1220
                        /* A start tag whose tag name is "hr" */
1221
                        case 'hr':
1222
                            /* If the stack of open elements has a p element in scope,
1223
                            then act as if an end tag with the tag name p had been seen. */
1224
                            if ($this->elementInScope('p')) {
1225
                                $this->emitToken(array(
1226
                                    'name' => 'p',
1227
                                    'type' => HTML5_Tokenizer::ENDTAG
1228
                                ));
1229
                            }
1230
1231
                            /* Insert an HTML element for the token. */
1232
                            $this->insertElement($token);
1233
1234
                            /* Immediately pop the current node off the stack of open elements. */
1235
                            array_pop($this->stack);
1236
1237
                            // YYY: Acknowledge the token's self-closing flag, if it is set.
1238
1239
                            $this->flag_frameset_ok = false;
1240
                        break;
1241
1242
                        /* A start tag whose tag name is "image" */
1243
                        case 'image':
1244
                            /* Parse error. Change the token's tag name to "img" and
1245
                            reprocess it. (Don't ask.) */
1246
                            $token['name'] = 'img';
1247
                            $this->emitToken($token);
1248
                        break;
1249
1250
                        /* A start tag whose tag name is "isindex" */
1251
                        case 'isindex':
1252
                            /* Parse error. */
1253
1254
                            /* If the form element pointer is not null,
1255
                            then ignore the token. */
1256
                            if ($this->form_pointer === null) {
1257
                                /* Act as if a start tag token with the tag name "form" had
1258
                                been seen. */
1259
                                /* If the token has an attribute called "action", set
1260
                                 * the action attribute on the resulting form
1261
                                 * element to the value of the "action" attribute of
1262
                                 * the token. */
1263
                                $attr = array();
1264
                                $action = $this->getAttr($token, 'action');
1265
                                if ($action !== false) {
1266
                                    $attr[] = array('name' => 'action', 'value' => $action);
1267
                                }
1268
                                $this->emitToken(array(
1269
                                    'name' => 'form',
1270
                                    'type' => HTML5_Tokenizer::STARTTAG,
1271
                                    'attr' => $attr
1272
                                ));
1273
1274
                                /* Act as if a start tag token with the tag name "hr" had
1275
                                been seen. */
1276
                                $this->emitToken(array(
1277
                                    'name' => 'hr',
1278
                                    'type' => HTML5_Tokenizer::STARTTAG,
1279
                                    'attr' => array()
1280
                                ));
1281
1282
                                /* Act as if a start tag token with the tag name "label"
1283
                                had been seen. */
1284
                                $this->emitToken(array(
1285
                                    'name' => 'label',
1286
                                    'type' => HTML5_Tokenizer::STARTTAG,
1287
                                    'attr' => array()
1288
                                ));
1289
1290
                                /* Act as if a stream of character tokens had been seen. */
1291
                                $prompt = $this->getAttr($token, 'prompt');
1292
                                if ($prompt === false) {
1293
                                    $prompt = 'This is a searchable index. '.
1294
                                    'Insert your search keywords here: ';
1295
                                }
1296
                                $this->emitToken(array(
1297
                                    'data' => $prompt,
1298
                                    'type' => HTML5_Tokenizer::CHARACTER,
1299
                                ));
1300
1301
                                /* Act as if a start tag token with the tag name "input"
1302
                                had been seen, with all the attributes from the "isindex"
1303
                                token, except with the "name" attribute set to the value
1304
                                "isindex" (ignoring any explicit "name" attribute). */
1305
                                $attr = array();
1306
                                foreach ($token['attr'] as $keypair) {
1307
                                    if ($keypair['name'] === 'name' || $keypair['name'] === 'action' ||
1308
                                        $keypair['name'] === 'prompt') {
1309
                                        continue;
1310
                                    }
1311
                                    $attr[] = $keypair;
1312
                                }
1313
                                $attr[] = array('name' => 'name', 'value' => 'isindex');
1314
1315
                                $this->emitToken(array(
1316
                                    'name' => 'input',
1317
                                    'type' => HTML5_Tokenizer::STARTTAG,
1318
                                    'attr' => $attr
1319
                                ));
1320
1321
                                /* Act as if an end tag token with the tag name "label"
1322
                                had been seen. */
1323
                                $this->emitToken(array(
1324
                                    'name' => 'label',
1325
                                    'type' => HTML5_Tokenizer::ENDTAG
1326
                                ));
1327
1328
                                /* Act as if a start tag token with the tag name "hr" had
1329
                                been seen. */
1330
                                $this->emitToken(array(
1331
                                    'name' => 'hr',
1332
                                    'type' => HTML5_Tokenizer::STARTTAG
1333
                                ));
1334
1335
                                /* Act as if an end tag token with the tag name "form" had
1336
                                been seen. */
1337
                                $this->emitToken(array(
1338
                                    'name' => 'form',
1339
                                    'type' => HTML5_Tokenizer::ENDTAG
1340
                                ));
1341
                            } else {
1342
                                $this->ignored = true;
1343
                            }
1344
                        break;
1345
1346
                        /* A start tag whose tag name is "textarea" */
1347
                        case 'textarea':
1348
                            $this->insertElement($token);
1349
1350
                            /* If the next token is a U+000A LINE FEED (LF)
1351
                             * character token, then ignore that token and move on to
1352
                             * the next one. (Newlines at the start of textarea
1353
                             * elements are ignored as an authoring convenience.)
1354
                             * need flag, see also <pre> */
1355
                            $this->ignore_lf_token = 2;
1356
1357
                            $this->original_mode = $this->mode;
1358
                            $this->flag_frameset_ok = false;
1359
                            $this->mode = self::IN_CDATA_RCDATA;
1360
1361
                            /* Switch the tokeniser's content model flag to the
1362
                            RCDATA state. */
1363
                            $this->content_model = HTML5_Tokenizer::RCDATA;
1364
                        break;
1365
1366
                        /* A start tag token whose tag name is "xmp" */
1367
                        case 'xmp':
1368
                            /* If the stack of open elements has a p element in
1369
                            scope, then act as if an end tag with the tag name
1370
                            "p" has been seen. */
1371
                            if ($this->elementInScope('p')) {
1372
                                $this->emitToken(array(
1373
                                    'name' => 'p',
1374
                                    'type' => HTML5_Tokenizer::ENDTAG
1375
                                ));
1376
                            }
1377
1378
                            /* Reconstruct the active formatting elements, if any. */
1379
                            $this->reconstructActiveFormattingElements();
1380
1381
                            $this->flag_frameset_ok = false;
1382
1383
                            $this->insertCDATAElement($token);
1384
                        break;
1385
1386
                        case 'iframe':
1387
                            $this->flag_frameset_ok = false;
1388
                            $this->insertCDATAElement($token);
1389
                        break;
1390
1391
                        case 'noembed': case 'noscript':
1392
                            // XSCRIPT: should check scripting flag
1393
                            $this->insertCDATAElement($token);
1394
                        break;
1395
1396
                        /* A start tag whose tag name is "select" */
1397
                        case 'select':
1398
                            /* Reconstruct the active formatting elements, if any. */
1399
                            $this->reconstructActiveFormattingElements();
1400
1401
                            /* Insert an HTML element for the token. */
1402
                            $this->insertElement($token);
1403
1404
                            $this->flag_frameset_ok = false;
1405
1406
                            /* If the insertion mode is one of in table", "in caption",
1407
                             * "in column group", "in table body", "in row", or "in
1408
                             * cell", then switch the insertion mode to "in select in
1409
                             * table". Otherwise, switch the insertion mode  to "in
1410
                             * select". */
1411
                            if (
1412
                                $this->mode === self::IN_TABLE || $this->mode === self::IN_CAPTION ||
1413
                                $this->mode === self::IN_COLUMN_GROUP || $this->mode ==+self::IN_TABLE_BODY ||
1414
                                $this->mode === self::IN_ROW || $this->mode === self::IN_CELL
1415
                            ) {
1416
                                $this->mode = self::IN_SELECT_IN_TABLE;
1417
                            } else {
1418
                                $this->mode = self::IN_SELECT;
1419
                            }
1420
                        break;
1421
1422
                        case 'option': case 'optgroup':
1423
                            if ($this->elementInScope('option')) {
1424
                                $this->emitToken(array(
1425
                                    'name' => 'option',
1426
                                    'type' => HTML5_Tokenizer::ENDTAG,
1427
                                ));
1428
                            }
1429
                            $this->reconstructActiveFormattingElements();
1430
                            $this->insertElement($token);
1431
                        break;
1432
1433
                        case 'rp': case 'rt':
1434
                            /* If the stack of open elements has a ruby element in scope, then generate
1435
                             * implied end tags. If the current node is not then a ruby element, this is
1436
                             * a parse error; pop all the nodes from the current node up to the node
1437
                             * immediately before the bottommost ruby element on the stack of open elements.
1438
                             */
1439
                            if ($this->elementInScope('ruby')) {
1440
                                $this->generateImpliedEndTags();
1441
                            }
1442
                            $peek = false;
1443
                            do {
1444
                                /*if ($peek) {
1445
                                    // parse error
1446
                                }*/
1447
                                $peek = array_pop($this->stack);
1448
                            } while ($peek->tagName !== 'ruby');
1449
                            $this->stack[] = $peek; // we popped one too many
1450
                            $this->insertElement($token);
1451
                        break;
1452
1453
                        // spec diversion
1454
1455
                        case 'math':
1456
                            $this->reconstructActiveFormattingElements();
1457
                            $token = $this->adjustMathMLAttributes($token);
1458
                            $token = $this->adjustForeignAttributes($token);
1459
                            $this->insertForeignElement($token, self::NS_MATHML);
1460
                            if (isset($token['self-closing'])) {
1461
                                // XERROR: acknowledge the token's self-closing flag
1462
                                array_pop($this->stack);
1463
                            }
1464
                            if ($this->mode !== self::IN_FOREIGN_CONTENT) {
1465
                                $this->secondary_mode = $this->mode;
1466
                                $this->mode = self::IN_FOREIGN_CONTENT;
1467
                            }
1468
                        break;
1469
1470
                        case 'svg':
1471
                            $this->reconstructActiveFormattingElements();
1472
                            $token = $this->adjustSVGAttributes($token);
1473
                            $token = $this->adjustForeignAttributes($token);
1474
                            $this->insertForeignElement($token, self::NS_SVG);
1475
                            if (isset($token['self-closing'])) {
1476
                                // XERROR: acknowledge the token's self-closing flag
1477
                                array_pop($this->stack);
1478
                            }
1479
                            if ($this->mode !== self::IN_FOREIGN_CONTENT) {
1480
                                $this->secondary_mode = $this->mode;
1481
                                $this->mode = self::IN_FOREIGN_CONTENT;
1482
                            }
1483
                        break;
1484
1485
                        case 'caption': case 'col': case 'colgroup': case 'frame': case 'head':
1486
                        case 'tbody': case 'td': case 'tfoot': case 'th': case 'thead': case 'tr':
1487
                            // parse error
1488
                        break;
1489
1490
                        /* A start tag token not covered by the previous entries */
1491
                        default:
1492
                            /* Reconstruct the active formatting elements, if any. */
1493
                            $this->reconstructActiveFormattingElements();
1494
1495
                            $this->insertElement($token);
1496
                            /* This element will be a phrasing  element. */
1497
                        break;
1498
                    }
1499
                    break;
1500
1501
                    case HTML5_Tokenizer::ENDTAG:
1502
                    switch ($token['name']) {
1503
                        /* An end tag with the tag name "body" */
1504
                        case 'body':
1505
                            /* If the stack of open elements does not have a body
1506
                             * element in scope, this is a parse error; ignore the
1507
                             * token. */
1508
                            if (!$this->elementInScope('body')) {
1509
                                $this->ignored = true;
1510
1511
                            /* Otherwise, if there is a node in the stack of open
1512
                             * elements that is not either a dc element, a dd element,
1513
                             * a ds element, a dt element, an li element, an optgroup
1514
                             * element, an option element, a p element, an rp element,
1515
                             * an rt element, a tbody element, a td element, a tfoot
1516
                             * element, a th element, a thead element, a tr element,
1517
                             * the body element, or the html element, then this is a
1518
                             * parse error.
1519
                             */
1520
                            } else {
1521
                                // XERROR: implement this check for parse error
1522
                            }
1523
1524
                            /* Change the insertion mode to "after body". */
1525
                            $this->mode = self::AFTER_BODY;
1526
                        break;
1527
1528
                        /* An end tag with the tag name "html" */
1529
                        case 'html':
1530
                            /* Act as if an end tag with tag name "body" had been seen,
1531
                            then, if that token wasn't ignored, reprocess the current
1532
                            token. */
1533
                            $this->emitToken(array(
1534
                                'name' => 'body',
1535
                                'type' => HTML5_Tokenizer::ENDTAG
1536
                            ));
1537
1538
                            if (!$this->ignored) {
1539
                                $this->emitToken($token);
1540
                            }
1541
                        break;
1542
1543
                        case 'address': case 'article': case 'aside': case 'blockquote':
1544
                        case 'center': case 'datagrid': case 'details': case 'dir':
1545
                        case 'div': case 'dl': case 'fieldset': case 'footer':
1546
                        case 'header': case 'hgroup': case 'listing': case 'menu':
1547
                        case 'nav': case 'ol': case 'pre': case 'section': case 'ul':
1548
                            /* If the stack of open elements has an element in scope
1549
                            with the same tag name as that of the token, then generate
1550
                            implied end tags. */
1551
                            if ($this->elementInScope($token['name'])) {
1552
                                $this->generateImpliedEndTags();
1553
1554
                                /* Now, if the current node is not an element with
1555
                                the same tag name as that of the token, then this
1556
                                is a parse error. */
1557
                                // XERROR: implement parse error logic
1558
1559
                                /* If the stack of open elements has an element in
1560
                                scope with the same tag name as that of the token,
1561
                                then pop elements from this stack until an element
1562
                                with that tag name has been popped from the stack. */
1563
                                do {
1564
                                    $node = array_pop($this->stack);
1565
                                } while ($node->tagName !== $token['name']);
1566
                            } else {
1567
                                // parse error
1568
                            }
1569
                        break;
1570
1571
                        /* An end tag whose tag name is "form" */
1572
                        case 'form':
1573
                            /* Let node be the element that the form element pointer is set to. */
1574
                            $node = $this->form_pointer;
1575
                            /* Set the form element pointer  to null. */
1576
                            $this->form_pointer = null;
1577
                            /* If node is null or the stack of open elements does not
1578
                                * have node in scope, then this is a parse error; ignore the token. */
1579
                            if ($node === null || !in_array($node, $this->stack)) {
1580
                                // parse error
1581
                                $this->ignored = true;
1582
                            } else {
1583
                                /* 1. Generate implied end tags. */
1584
                                $this->generateImpliedEndTags();
1585
                                /* 2. If the current node is not node, then this is a parse error.  */
1586
                                if (end($this->stack) !== $node) {
1587
                                    // parse error
1588
                                }
1589
                                /* 3. Remove node from the stack of open elements. */
1590
                                array_splice($this->stack, array_search($node, $this->stack, true), 1);
1591
                            }
1592
1593
                        break;
1594
1595
                        /* An end tag whose tag name is "p" */
1596
                        case 'p':
1597
                            /* If the stack of open elements has a p element in scope,
1598
                            then generate implied end tags, except for p elements. */
1599
                            if ($this->elementInScope('p')) {
1600
                                /* Generate implied end tags, except for elements with
1601
                                 * the same tag name as the token. */
1602
                                $this->generateImpliedEndTags(array('p'));
1603
1604
                                /* If the current node is not a p element, then this is
1605
                                a parse error. */
1606
                                // XERROR: implement
1607
1608
                                /* Pop elements from the stack of open elements  until
1609
                                 * an element with the same tag name as the token has
1610
                                 * been popped from the stack. */
1611
                                do {
1612
                                    $node = array_pop($this->stack);
1613
                                } while ($node->tagName !== 'p');
1614
1615
                            } else {
1616
                                // parse error
1617
                                $this->emitToken(array(
1618
                                    'name' => 'p',
1619
                                    'type' => HTML5_Tokenizer::STARTTAG,
1620
                                ));
1621
                                $this->emitToken($token);
1622
                            }
1623
                        break;
1624
1625
                        /* An end tag whose tag name is "li" */
1626
                        case 'li':
1627
                            /* If the stack of open elements does not have an element
1628
                             * in list item scope with the same tag name as that of the
1629
                             * token, then this is a parse error; ignore the token. */
1630
                            if ($this->elementInScope($token['name'], self::SCOPE_LISTITEM)) {
1631
                                /* Generate implied end tags, except for elements with the
1632
                                 * same tag name as the token. */
1633
                                $this->generateImpliedEndTags(array($token['name']));
1634
                                /* If the current node is not an element with the same tag
1635
                                 * name as that of the token, then this is a parse error. */
1636
                                // XERROR: parse error
1637
                                /* Pop elements from the stack of open elements  until an
1638
                                 * element with the same tag name as the token has been
1639
                                 * popped from the stack. */
1640
                                do {
1641
                                    $node = array_pop($this->stack);
1642
                                } while ($node->tagName !== $token['name']);
1643
                            }
1644
                            /*else {
1645
                                // XERROR: parse error
1646
                            }*/
1647
                        break;
1648
1649
                        /* An end tag whose tag name is "dc", "dd", "ds", "dt" */
1650
                        case 'dc': case 'dd': case 'ds': case 'dt':
1651
                            if ($this->elementInScope($token['name'])) {
1652
                                $this->generateImpliedEndTags(array($token['name']));
1653
1654
                                /* If the current node is not an element with the same
1655
                                tag name as the token, then this is a parse error. */
1656
                                // XERROR: implement parse error
1657
1658
                                /* Pop elements from the stack of open elements  until
1659
                                 * an element with the same tag name as the token has
1660
                                 * been popped from the stack. */
1661
                                do {
1662
                                    $node = array_pop($this->stack);
1663
                                } while ($node->tagName !== $token['name']);
1664
                            }
1665
                            /*else {
1666
                                // XERROR: parse error
1667
                            }*/
1668
                        break;
1669
1670
                        /* An end tag whose tag name is one of: "h1", "h2", "h3", "h4",
1671
                        "h5", "h6" */
1672
                        case 'h1': case 'h2': case 'h3': case 'h4': case 'h5': case 'h6':
1673
                            $elements = array('h1', 'h2', 'h3', 'h4', 'h5', 'h6');
1674
1675
                            /* If the stack of open elements has in scope an element whose
1676
                            tag name is one of "h1", "h2", "h3", "h4", "h5", or "h6", then
1677
                            generate implied end tags. */
1678
                            if ($this->elementInScope($elements)) {
1679
                                $this->generateImpliedEndTags();
1680
1681
                                /* Now, if the current node is not an element with the same
1682
                                tag name as that of the token, then this is a parse error. */
1683
                                // XERROR: implement parse error
1684
1685
                                /* If the stack of open elements has in scope an element
1686
                                whose tag name is one of "h1", "h2", "h3", "h4", "h5", or
1687
                                "h6", then pop elements from the stack until an element
1688
                                with one of those tag names has been popped from the stack. */
1689
                                do {
1690
                                    $node = array_pop($this->stack);
1691
                                } while (!in_array($node->tagName, $elements));
1692
                            }
1693
                            /*else {
1694
                                // parse error
1695
                            }*/
1696
                        break;
1697
1698
                        /* An end tag whose tag name is one of: "a", "b", "big", "em",
1699
                        "font", "i", "nobr", "s", "small", "strike", "strong", "tt", "u" */
1700
                        case 'a': case 'b': case 'big': case 'code': case 'em': case 'font':
1701
                        case 'i': case 'nobr': case 's': case 'small': case 'strike':
1702
                        case 'strong': case 'tt': case 'u':
1703
                            // XERROR: generally speaking this needs parse error logic
1704
                            /* 1. Let the formatting element be the last element in
1705
                            the list of active formatting elements that:
1706
                                * is between the end of the list and the last scope
1707
                                marker in the list, if any, or the start of the list
1708
                                otherwise, and
1709
                                * has the same tag name as the token.
1710
                            */
1711
                            while (true) {
1712
                                for ($a = count($this->a_formatting) - 1; $a >= 0; $a--) {
1713
                                    if ($this->a_formatting[$a] === self::MARKER) {
1714
                                        break;
1715
                                    } elseif ($this->a_formatting[$a]->tagName === $token['name']) {
1716
                                        $formatting_element = $this->a_formatting[$a];
1717
                                        $in_stack = in_array($formatting_element, $this->stack, true);
1718
                                        $fe_af_pos = $a;
1719
                                        break;
1720
                                    }
1721
                                }
1722
1723
                                /* If there is no such node, or, if that node is
1724
                                also in the stack of open elements but the element
1725
                                is not in scope, then this is a parse error. Abort
1726
                                these steps. The token is ignored. */
1727
                                if (
1728
                                    !isset($formatting_element) || (
1729
                                        $in_stack &&
1730
                                        !$this->elementInScope($token['name'])
1731
                                    )
1732
                                ) {
1733
                                    $this->ignored = true;
1734
                                    break;
1735
1736
                                /* Otherwise, if there is such a node, but that node
1737
                                is not in the stack of open elements, then this is a
1738
                                parse error; remove the element from the list, and
1739
                                abort these steps. */
1740
                                } elseif (isset($formatting_element) && !$in_stack) {
1741
                                    unset($this->a_formatting[$fe_af_pos]);
1742
                                    $this->a_formatting = array_merge($this->a_formatting);
1743
                                    break;
1744
                                }
1745
1746
                                /* Otherwise, there is a formatting element and that
1747
                                 * element is in the stack and is in scope. If the
1748
                                 * element is not the current node, this is a parse
1749
                                 * error. In any case, proceed with the algorithm as
1750
                                 * written in the following steps. */
1751
                                // XERROR: implement me
1752
1753
                                /* 2. Let the furthest block be the topmost node in the
1754
                                stack of open elements that is lower in the stack
1755
                                than the formatting element, and is not an element in
1756
                                the phrasing or formatting categories. There might
1757
                                not be one. */
1758
                                $fe_s_pos = array_search($formatting_element, $this->stack, true);
1759
                                $length = count($this->stack);
1760
1761
                                for ($s = $fe_s_pos + 1; $s < $length; $s++) {
1762
                                    $category = $this->getElementCategory($this->stack[$s]);
1763
1764
                                    if ($category !== self::PHRASING && $category !== self::FORMATTING) {
1765
                                        $furthest_block = $this->stack[$s];
1766
                                        break;
1767
                                    }
1768
                                }
1769
1770
                                /* 3. If there is no furthest block, then the UA must
1771
                                skip the subsequent steps and instead just pop all
1772
                                the nodes from the bottom of the stack of open
1773
                                elements, from the current node up to the formatting
1774
                                element, and remove the formatting element from the
1775
                                list of active formatting elements. */
1776
                                if (!isset($furthest_block)) {
1777
                                    for ($n = $length - 1; $n >= $fe_s_pos; $n--) {
1778
                                        array_pop($this->stack);
1779
                                    }
1780
1781
                                    unset($this->a_formatting[$fe_af_pos]);
1782
                                    $this->a_formatting = array_merge($this->a_formatting);
1783
                                    break;
1784
                                }
1785
1786
                                /* 4. Let the common ancestor be the element
1787
                                immediately above the formatting element in the stack
1788
                                of open elements. */
1789
                                $common_ancestor = $this->stack[$fe_s_pos - 1];
1790
1791
                                /* 5. Let a bookmark note the position of the
1792
                                formatting element in the list of active formatting
1793
                                elements relative to the elements on either side
1794
                                of it in the list. */
1795
                                $bookmark = $fe_af_pos;
1796
1797
                                /* 6. Let node and last node  be the furthest block.
1798
                                Follow these steps: */
1799
                                $node = $furthest_block;
1800
                                $last_node = $furthest_block;
1801
1802
                                while (true) {
1803
                                    for ($n = array_search($node, $this->stack, true) - 1; $n >= 0; $n--) {
1804
                                        /* 6.1 Let node be the element immediately
1805
                                        prior to node in the stack of open elements. */
1806
                                        $node = $this->stack[$n];
1807
1808
                                        /* 6.2 If node is not in the list of active
1809
                                        formatting elements, then remove node from
1810
                                        the stack of open elements and then go back
1811
                                        to step 1. */
1812
                                        if (!in_array($node, $this->a_formatting, true)) {
1813
                                            array_splice($this->stack, $n, 1);
1814
                                        } else {
1815
                                            break;
1816
                                        }
1817
                                    }
1818
1819
                                    /* 6.3 Otherwise, if node is the formatting
1820
                                    element, then go to the next step in the overall
1821
                                    algorithm. */
1822
                                    if ($node === $formatting_element) {
1823
                                        break;
1824
1825
                                    /* 6.4 Otherwise, if last node is the furthest
1826
                                    block, then move the aforementioned bookmark to
1827
                                    be immediately after the node in the list of
1828
                                    active formatting elements. */
1829
                                    } elseif ($last_node === $furthest_block) {
1830
                                        $bookmark = array_search($node, $this->a_formatting, true) + 1;
1831
                                    }
1832
1833
                                    /* 6.5 Create an element for the token for which
1834
                                     * the element node was created, replace the entry
1835
                                     * for node in the list of active formatting
1836
                                     * elements with an entry for the new element,
1837
                                     * replace the entry for node in the stack of open
1838
                                     * elements with an entry for the new element, and
1839
                                     * let node be the new element. */
1840
                                    // we don't know what the token is anymore
1841
                                    // XDOM
1842
                                    $clone = $node->cloneNode();
1843
                                    $a_pos = array_search($node, $this->a_formatting, true);
1844
                                    $s_pos = array_search($node, $this->stack, true);
1845
                                    $this->a_formatting[$a_pos] = $clone;
1846
                                    $this->stack[$s_pos] = $clone;
1847
                                    $node = $clone;
1848
1849
                                    /* 6.6 Insert last node into node, first removing
1850
                                    it from its previous parent node if any. */
1851
                                    // XDOM
1852
                                    if ($last_node->parentNode !== null) {
1853
                                        $last_node->parentNode->removeChild($last_node);
1854
                                    }
1855
1856
                                    // XDOM
1857
                                    $node->appendChild($last_node);
1858
1859
                                    /* 6.7 Let last node be node. */
1860
                                    $last_node = $node;
1861
1862
                                    /* 6.8 Return to step 1 of this inner set of steps. */
1863
                                }
1864
1865
                                /* 7. If the common ancestor node is a table, tbody,
1866
                                 * tfoot, thead, or tr element, then, foster parent
1867
                                 * whatever last node ended up being in the previous
1868
                                 * step, first removing it from its previous parent
1869
                                 * node if any. */
1870
                                // XDOM
1871
                                if ($last_node->parentNode) { // common step
1872
                                    $last_node->parentNode->removeChild($last_node);
1873
                                }
1874
                                if (in_array($common_ancestor->tagName, array('table', 'tbody', 'tfoot', 'thead', 'tr'))) {
1875
                                    $this->fosterParent($last_node);
1876
                                /* Otherwise, append whatever last node  ended up being
1877
                                 * in the previous step to the common ancestor node,
1878
                                 * first removing it from its previous parent node if
1879
                                 * any. */
1880
                                } else {
1881
                                    // XDOM
1882
                                    $common_ancestor->appendChild($last_node);
1883
                                }
1884
1885
                                /* 8. Create an element for the token for which the
1886
                                 * formatting element was created. */
1887
                                // XDOM
1888
                                $clone = $formatting_element->cloneNode();
1889
1890
                                /* 9. Take all of the child nodes of the furthest
1891
                                block and append them to the element created in the
1892
                                last step. */
1893
                                // XDOM
1894
                                while ($furthest_block->hasChildNodes()) {
1895
                                    $child = $furthest_block->firstChild;
1896
                                    $furthest_block->removeChild($child);
1897
                                    $clone->appendChild($child);
1898
                                }
1899
1900
                                /* 10. Append that clone to the furthest block. */
1901
                                // XDOM
1902
                                $furthest_block->appendChild($clone);
1903
1904
                                /* 11. Remove the formatting element from the list
1905
                                of active formatting elements, and insert the new element
1906
                                into the list of active formatting elements at the
1907
                                position of the aforementioned bookmark. */
1908
                                $fe_af_pos = array_search($formatting_element, $this->a_formatting, true);
1909
                                array_splice($this->a_formatting, $fe_af_pos, 1);
1910
1911
                                $af_part1 = array_slice($this->a_formatting, 0, $bookmark - 1);
1912
                                $af_part2 = array_slice($this->a_formatting, $bookmark);
1913
                                $this->a_formatting = array_merge($af_part1, array($clone), $af_part2);
1914
1915
                                /* 12. Remove the formatting element from the stack
1916
                                of open elements, and insert the new element into the stack
1917
                                of open elements immediately below the position of the
1918
                                furthest block in that stack. */
1919
                                $fe_s_pos = array_search($formatting_element, $this->stack, true);
1920
                                array_splice($this->stack, $fe_s_pos, 1);
1921
1922
                                $fb_s_pos = array_search($furthest_block, $this->stack, true);
1923
                                $s_part1 = array_slice($this->stack, 0, $fb_s_pos + 1);
1924
                                $s_part2 = array_slice($this->stack, $fb_s_pos + 1);
1925
                                $this->stack = array_merge($s_part1, array($clone), $s_part2);
1926
1927
                                /* 13. Jump back to step 1 in this series of steps. */
1928
                                unset($formatting_element, $fe_af_pos, $fe_s_pos, $furthest_block);
1929
                            }
1930
                        break;
1931
1932
                        case 'applet': case 'button': case 'marquee': case 'object':
1933
                            /* If the stack of open elements has an element in scope whose
1934
                            tag name matches the tag name of the token, then generate implied
1935
                            tags. */
1936
                            if ($this->elementInScope($token['name'])) {
1937
                                $this->generateImpliedEndTags();
1938
1939
                                /* Now, if the current node is not an element with the same
1940
                                tag name as the token, then this is a parse error. */
1941
                                // XERROR: implement logic
1942
1943
                                /* Pop elements from the stack of open elements  until
1944
                                 * an element with the same tag name as the token has
1945
                                 * been popped from the stack. */
1946
                                do {
1947
                                    $node = array_pop($this->stack);
1948
                                } while ($node->tagName !== $token['name']);
1949
1950
                                /* Clear the list of active formatting elements up to the
1951
                                 * last marker. */
1952
                                $keys = array_keys($this->a_formatting, self::MARKER, true);
1953
                                $marker = end($keys);
1954
1955
                                for ($n = count($this->a_formatting) - 1; $n > $marker; $n--) {
1956
                                    array_pop($this->a_formatting);
1957
                                }
1958
                            }
1959
                            /*else {
1960
                                // parse error
1961
                            }*/
1962
                        break;
1963
1964
                        case 'br':
1965
                            // Parse error
1966
                            $this->emitToken(array(
1967
                                'name' => 'br',
1968
                                'type' => HTML5_Tokenizer::STARTTAG,
1969
                            ));
1970
                        break;
1971
1972
                        /* An end tag token not covered by the previous entries */
1973
                        default:
1974
                            for ($n = count($this->stack) - 1; $n >= 0; $n--) {
1975
                                /* Initialise node to be the current node (the bottommost
1976
                                node of the stack). */
1977
                                $node = $this->stack[$n];
1978
1979
                                /* If node has the same tag name as the end tag token,
1980
                                then: */
1981
                                if ($token['name'] === $node->tagName) {
1982
                                    /* Generate implied end tags. */
1983
                                    $this->generateImpliedEndTags();
1984
1985
                                    /* If the tag name of the end tag token does not
1986
                                    match the tag name of the current node, this is a
1987
                                    parse error. */
1988
                                    // XERROR: implement this
1989
1990
                                    /* Pop all the nodes from the current node up to
1991
                                    node, including node, then stop these steps. */
1992
                                    // XSKETCHY
1993
                                    do {
1994
                                        $pop = array_pop($this->stack);
1995
                                    } while ($pop !== $node);
1996
                                    break;
1997
                                } else {
1998
                                    $category = $this->getElementCategory($node);
1999
2000
                                    if ($category !== self::FORMATTING && $category !== self::PHRASING) {
2001
                                        /* Otherwise, if node is in neither the formatting
2002
                                        category nor the phrasing category, then this is a
2003
                                        parse error. Stop this algorithm. The end tag token
2004
                                        is ignored. */
2005
                                        $this->ignored = true;
2006
                                        break;
2007
                                        // parse error
2008
                                    }
2009
                                }
2010
                                /* Set node to the previous entry in the stack of open elements. Loop. */
2011
                            }
2012
                        break;
2013
                    }
2014
                    break;
2015
                }
2016
                break;
2017
2018
            case self::IN_CDATA_RCDATA:
2019
                if (
2020
                    $token['type'] === HTML5_Tokenizer::CHARACTER ||
2021
                    $token['type'] === HTML5_Tokenizer::SPACECHARACTER
2022
                ) {
2023
                    $this->insertText($token['data']);
2024
                } elseif ($token['type'] === HTML5_Tokenizer::EOF) {
2025
                    // parse error
2026
                    /* If the current node is a script  element, mark the script
2027
                     * element as "already executed". */
2028
                    // probably not necessary
2029
                    array_pop($this->stack);
2030
                    $this->mode = $this->original_mode;
2031
                    $this->emitToken($token);
2032
                } elseif ($token['type'] === HTML5_Tokenizer::ENDTAG && $token['name'] === 'script') {
2033
                    array_pop($this->stack);
2034
                    $this->mode = $this->original_mode;
2035
                    // we're ignoring all of the execution stuff
2036
                } elseif ($token['type'] === HTML5_Tokenizer::ENDTAG) {
2037
                    array_pop($this->stack);
2038
                    $this->mode = $this->original_mode;
2039
                }
2040
            break;
2041
2042
            case self::IN_TABLE:
2043
                $clear = array('html', 'table');
2044
2045
                /* A character token */
2046
                if ($token['type'] === HTML5_Tokenizer::CHARACTER ||
2047
                    $token['type'] === HTML5_Tokenizer::SPACECHARACTER) {
2048
                    /* Let the pending table character tokens
2049
                     * be an empty list of tokens. */
2050
                    $this->pendingTableCharacters = "";
2051
                    $this->pendingTableCharactersDirty = false;
2052
                    /* Let the original insertion mode be the current
2053
                     * insertion mode. */
2054
                    $this->original_mode = $this->mode;
2055
                    /* Switch the insertion mode to
2056
                     * "in table text" and
2057
                     * reprocess the token. */
2058
                    $this->mode = self::IN_TABLE_TEXT;
2059
                    $this->emitToken($token);
2060
2061
                /* A comment token */
2062
                } elseif ($token['type'] === HTML5_Tokenizer::COMMENT) {
2063
                    /* Append a Comment node to the current node with the data
2064
                    attribute set to the data given in the comment token. */
2065
                    $this->insertComment($token['data']);
2066
2067
                } elseif ($token['type'] === HTML5_Tokenizer::DOCTYPE) {
2068
                    // parse error
2069
2070
                /* A start tag whose tag name is "caption" */
2071
                } elseif ($token['type'] === HTML5_Tokenizer::STARTTAG &&
2072
                $token['name'] === 'caption') {
2073
                    /* Clear the stack back to a table context. */
2074
                    $this->clearStackToTableContext($clear);
2075
2076
                    /* Insert a marker at the end of the list of active
2077
                    formatting elements. */
2078
                    $this->a_formatting[] = self::MARKER;
2079
2080
                    /* Insert an HTML element for the token, then switch the
2081
                    insertion mode to "in caption". */
2082
                    $this->insertElement($token);
2083
                    $this->mode = self::IN_CAPTION;
2084
2085
                /* A start tag whose tag name is "colgroup" */
2086
                } elseif ($token['type'] === HTML5_Tokenizer::STARTTAG &&
2087
                $token['name'] === 'colgroup') {
2088
                    /* Clear the stack back to a table context. */
2089
                    $this->clearStackToTableContext($clear);
2090
2091
                    /* Insert an HTML element for the token, then switch the
2092
                    insertion mode to "in column group". */
2093
                    $this->insertElement($token);
2094
                    $this->mode = self::IN_COLUMN_GROUP;
2095
2096
                /* A start tag whose tag name is "col" */
2097
                } elseif ($token['type'] === HTML5_Tokenizer::STARTTAG &&
2098
                $token['name'] === 'col') {
2099
                    $this->emitToken(array(
2100
                        'name' => 'colgroup',
2101
                        'type' => HTML5_Tokenizer::STARTTAG,
2102
                        'attr' => array()
2103
                    ));
2104
2105
                    $this->emitToken($token);
2106
2107
                /* A start tag whose tag name is one of: "tbody", "tfoot", "thead" */
2108
                } elseif ($token['type'] === HTML5_Tokenizer::STARTTAG && in_array($token['name'],
2109
                array('tbody', 'tfoot', 'thead'))) {
2110
                    /* Clear the stack back to a table context. */
2111
                    $this->clearStackToTableContext($clear);
2112
2113
                    /* Insert an HTML element for the token, then switch the insertion
2114
                    mode to "in table body". */
2115
                    $this->insertElement($token);
2116
                    $this->mode = self::IN_TABLE_BODY;
2117
2118
                /* A start tag whose tag name is one of: "td", "th", "tr" */
2119
                } elseif ($token['type'] === HTML5_Tokenizer::STARTTAG &&
2120
                in_array($token['name'], array('td', 'th', 'tr'))) {
2121
                    /* Act as if a start tag token with the tag name "tbody" had been
2122
                    seen, then reprocess the current token. */
2123
                    $this->emitToken(array(
2124
                        'name' => 'tbody',
2125
                        'type' => HTML5_Tokenizer::STARTTAG,
2126
                        'attr' => array()
2127
                    ));
2128
2129
                    $this->emitToken($token);
2130
2131
                /* A start tag whose tag name is "table" */
2132
                } elseif ($token['type'] === HTML5_Tokenizer::STARTTAG &&
2133
                $token['name'] === 'table') {
2134
                    /* Parse error. Act as if an end tag token with the tag name "table"
2135
                    had been seen, then, if that token wasn't ignored, reprocess the
2136
                    current token. */
2137
                    $this->emitToken(array(
2138
                        'name' => 'table',
2139
                        'type' => HTML5_Tokenizer::ENDTAG
2140
                    ));
2141
2142
                    if (!$this->ignored) {
2143
                        $this->emitToken($token);
2144
                    }
2145
2146
                /* An end tag whose tag name is "table" */
2147
                } elseif ($token['type'] === HTML5_Tokenizer::ENDTAG &&
2148
                $token['name'] === 'table') {
2149
                    /* If the stack of open elements does not have an element in table
2150
                    scope with the same tag name as the token, this is a parse error.
2151
                    Ignore the token. (fragment case) */
2152
                    if (!$this->elementInScope($token['name'], self::SCOPE_TABLE)) {
2153
                        $this->ignored = true;
2154
                    } else {
2155
                        do {
2156
                            $node = array_pop($this->stack);
2157
                        } while ($node->tagName !== 'table');
2158
2159
                        /* Reset the insertion mode appropriately. */
2160
                        $this->resetInsertionMode();
2161
                    }
2162
2163
                /* An end tag whose tag name is one of: "body", "caption", "col",
2164
                "colgroup", "html", "tbody", "td", "tfoot", "th", "thead", "tr" */
2165
                } elseif ($token['type'] === HTML5_Tokenizer::ENDTAG && in_array($token['name'],
2166
                array('body', 'caption', 'col', 'colgroup', 'html', 'tbody', 'td',
2167
                'tfoot', 'th', 'thead', 'tr'))) {
2168
                    // Parse error. Ignore the token.
2169
2170
                } elseif ($token['type'] === HTML5_Tokenizer::STARTTAG &&
2171
                ($token['name'] === 'style' || $token['name'] === 'script')) {
2172
                    $this->processWithRulesFor($token, self::IN_HEAD);
2173
2174
                } elseif ($token['type'] === HTML5_Tokenizer::STARTTAG && $token['name'] === 'input' &&
2175
                // assignment is intentional
2176
                /* If the token does not have an attribute with the name "type", or
2177
                 * if it does, but that attribute's value is not an ASCII
2178
                 * case-insensitive match for the string "hidden", then: act as
2179
                 * described in the "anything else" entry below. */
2180
                ($type = $this->getAttr($token, 'type')) && strtolower($type) === 'hidden') {
2181
                    // I.e., if its an input with the type attribute == 'hidden'
2182
                    /* Otherwise */
2183
                    // parse error
2184
                    $this->insertElement($token);
2185
                    array_pop($this->stack);
2186
                } elseif ($token['type'] === HTML5_Tokenizer::EOF) {
2187
                    /* If the current node is not the root html element, then this is a parse error. */
2188
                    if (end($this->stack)->tagName !== 'html') {
2189
                        // Note: It can only be the current node in the fragment case.
2190
                        // parse error
2191
                    }
2192
                    /* Stop parsing. */
2193
                /* Anything else */
2194
                } else {
2195
                    /* Parse error. Process the token as if the insertion mode was "in
2196
                    body", with the following exception: */
2197
2198
                    $old = $this->foster_parent;
2199
                    $this->foster_parent = true;
2200
                    $this->processWithRulesFor($token, self::IN_BODY);
2201
                    $this->foster_parent = $old;
2202
                }
2203
            break;
2204
2205
            case self::IN_TABLE_TEXT:
2206
                /* A character token */
2207
                if ($token['type'] === HTML5_Tokenizer::CHARACTER) {
2208
                    /* Append the character token to the pending table
2209
                     * character tokens list. */
2210
                    $this->pendingTableCharacters .= $token['data'];
2211
                    $this->pendingTableCharactersDirty = true;
2212
                } elseif ($token['type'] === HTML5_Tokenizer::SPACECHARACTER) {
2213
                    $this->pendingTableCharacters .= $token['data'];
2214
                /* Anything else */
2215
                } else {
2216
                    if ($this->pendingTableCharacters !== '' && is_string($this->pendingTableCharacters)) {
2217
                        /* If any of the tokens in the pending table character tokens list
2218
                         * are character tokens that are not one of U+0009 CHARACTER
2219
                         * TABULATION, U+000A LINE FEED (LF), U+000C FORM FEED (FF), or
2220
                         * U+0020 SPACE, then reprocess those character tokens using the
2221
                         * rules given in the "anything else" entry in the in table"
2222
                         * insertion mode.*/
2223
                        if ($this->pendingTableCharactersDirty) {
2224
                            /* Parse error. Process the token using the rules for the
2225
                             * "in body" insertion mode, except that if the current
2226
                             * node is a table, tbody, tfoot, thead, or tr element,
2227
                             * then, whenever a node would be inserted into the current
2228
                             * node, it must instead be foster parented. */
2229
                            // XERROR
2230
                            $old = $this->foster_parent;
2231
                            $this->foster_parent = true;
2232
                            $text_token = array(
2233
                                'type' => HTML5_Tokenizer::CHARACTER,
2234
                                'data' => $this->pendingTableCharacters,
2235
                            );
2236
                            $this->processWithRulesFor($text_token, self::IN_BODY);
2237
                            $this->foster_parent = $old;
2238
2239
                        /* Otherwise, insert the characters given by the pending table
2240
                         * character tokens list into the current node. */
2241
                        } else {
2242
                            $this->insertText($this->pendingTableCharacters);
2243
                        }
2244
                        $this->pendingTableCharacters = null;
2245
                        $this->pendingTableCharactersNull = null;
2246
                    }
2247
2248
                    /* Switch the insertion mode to the original insertion mode and
2249
                     * reprocess the token.
2250
                     */
2251
                    $this->mode = $this->original_mode;
2252
                    $this->emitToken($token);
2253
                }
2254
            break;
2255
2256
            case self::IN_CAPTION:
2257
                /* An end tag whose tag name is "caption" */
2258
                if ($token['type'] === HTML5_Tokenizer::ENDTAG && $token['name'] === 'caption') {
2259
                    /* If the stack of open elements does not have an element in table
2260
                    scope with the same tag name as the token, this is a parse error.
2261
                    Ignore the token. (fragment case) */
2262
                    if (!$this->elementInScope($token['name'], self::SCOPE_TABLE)) {
2263
                        $this->ignored = true;
2264
                        // Ignore
2265
2266
                    /* Otherwise: */
2267
                    } else {
2268
                        /* Generate implied end tags. */
2269
                        $this->generateImpliedEndTags();
2270
2271
                        /* Now, if the current node is not a caption element, then this
2272
                        is a parse error. */
2273
                        // XERROR: implement
2274
2275
                        /* Pop elements from this stack until a caption element has
2276
                        been popped from the stack. */
2277
                        do {
2278
                            $node = array_pop($this->stack);
2279
                        } while ($node->tagName !== 'caption');
2280
2281
                        /* Clear the list of active formatting elements up to the last
2282
                        marker. */
2283
                        $this->clearTheActiveFormattingElementsUpToTheLastMarker();
2284
2285
                        /* Switch the insertion mode to "in table". */
2286
                        $this->mode = self::IN_TABLE;
2287
                    }
2288
2289
                /* A start tag whose tag name is one of: "caption", "col", "colgroup",
2290
                "tbody", "td", "tfoot", "th", "thead", "tr", or an end tag whose tag
2291
                name is "table" */
2292
                } elseif (($token['type'] === HTML5_Tokenizer::STARTTAG && in_array($token['name'],
2293
                array('caption', 'col', 'colgroup', 'tbody', 'td', 'tfoot', 'th',
2294
                'thead', 'tr'))) || ($token['type'] === HTML5_Tokenizer::ENDTAG &&
2295
                $token['name'] === 'table')) {
2296
                    /* Parse error. Act as if an end tag with the tag name "caption"
2297
                    had been seen, then, if that token wasn't ignored, reprocess the
2298
                    current token. */
2299
                    $this->emitToken(array(
2300
                        'name' => 'caption',
2301
                        'type' => HTML5_Tokenizer::ENDTAG
2302
                    ));
2303
2304
                    if (!$this->ignored) {
2305
                        $this->emitToken($token);
2306
                    }
2307
2308
                /* An end tag whose tag name is one of: "body", "col", "colgroup",
2309
                "html", "tbody", "td", "tfoot", "th", "thead", "tr" */
2310
                } elseif ($token['type'] === HTML5_Tokenizer::ENDTAG && in_array($token['name'],
2311
                array('body', 'col', 'colgroup', 'html', 'tbody', 'tfoot', 'th',
2312
                'thead', 'tr'))) {
2313
                    // Parse error. Ignore the token.
2314
                    $this->ignored = true;
2315
                } else {
2316
                    /* Process the token as if the insertion mode was "in body". */
2317
                    $this->processWithRulesFor($token, self::IN_BODY);
2318
                }
2319
            break;
2320
2321
            case self::IN_COLUMN_GROUP:
2322
                /* A character token that is one of one of U+0009 CHARACTER TABULATION,
2323
                U+000A LINE FEED (LF), U+000B LINE TABULATION, U+000C FORM FEED (FF),
2324
                or U+0020 SPACE */
2325
                if ($token['type'] === HTML5_Tokenizer::SPACECHARACTER) {
2326
                    /* Append the character to the current node. */
2327
                    $this->insertText($token['data']);
2328
2329
                /* A comment token */
2330
                } elseif ($token['type'] === HTML5_Tokenizer::COMMENT) {
2331
                    /* Append a Comment node to the current node with the data
2332
                    attribute set to the data given in the comment token. */
2333
                    $this->insertComment($token['data']);
2334
                } elseif ($token['type'] === HTML5_Tokenizer::DOCTYPE) {
2335
                    // parse error
2336
                } elseif ($token['type'] === HTML5_Tokenizer::STARTTAG && $token['name'] === 'html') {
2337
                    $this->processWithRulesFor($token, self::IN_BODY);
2338
2339
                /* A start tag whose tag name is "col" */
2340
                } elseif ($token['type'] === HTML5_Tokenizer::STARTTAG && $token['name'] === 'col') {
2341
                    /* Insert a col element for the token. Immediately pop the current
2342
                    node off the stack of open elements. */
2343
                    $this->insertElement($token);
2344
                    array_pop($this->stack);
2345
                    // XERROR: Acknowledge the token's self-closing flag, if it is set.
2346
2347
                /* An end tag whose tag name is "colgroup" */
2348
                } elseif ($token['type'] === HTML5_Tokenizer::ENDTAG &&
2349
                $token['name'] === 'colgroup') {
2350
                    /* If the current node is the root html element, then this is a
2351
                    parse error, ignore the token. (fragment case) */
2352
                    if (end($this->stack)->tagName === 'html') {
2353
                        $this->ignored = true;
2354
2355
                    /* Otherwise, pop the current node (which will be a colgroup
2356
                    element) from the stack of open elements. Switch the insertion
2357
                    mode to "in table". */
2358
                    } else {
2359
                        array_pop($this->stack);
2360
                        $this->mode = self::IN_TABLE;
2361
                    }
2362
2363
                /* An end tag whose tag name is "col" */
2364
                } elseif ($token['type'] === HTML5_Tokenizer::ENDTAG && $token['name'] === 'col') {
2365
                    /* Parse error. Ignore the token. */
2366
                    $this->ignored = true;
2367
2368
                /* An end-of-file token */
2369
                /* If the current node is the root html  element */
2370
                } elseif ($token['type'] === HTML5_Tokenizer::EOF && end($this->stack)->tagName === 'html') {
2371
                    /* Stop parsing */
2372
2373
                /* Anything else */
2374
                } else {
2375
                    /* Act as if an end tag with the tag name "colgroup" had been seen,
2376
                    and then, if that token wasn't ignored, reprocess the current token. */
2377
                    $this->emitToken(array(
2378
                        'name' => 'colgroup',
2379
                        'type' => HTML5_Tokenizer::ENDTAG
2380
                    ));
2381
2382
                    if (!$this->ignored) {
2383
                        $this->emitToken($token);
2384
                    }
2385
                }
2386
            break;
2387
2388
            case self::IN_TABLE_BODY:
2389
                $clear = array('tbody', 'tfoot', 'thead', 'html');
2390
2391
                /* A start tag whose tag name is "tr" */
2392
                if ($token['type'] === HTML5_Tokenizer::STARTTAG && $token['name'] === 'tr') {
2393
                    /* Clear the stack back to a table body context. */
2394
                    $this->clearStackToTableContext($clear);
2395
2396
                    /* Insert a tr element for the token, then switch the insertion
2397
                    mode to "in row". */
2398
                    $this->insertElement($token);
2399
                    $this->mode = self::IN_ROW;
2400
2401
                /* A start tag whose tag name is one of: "th", "td" */
2402
                } elseif ($token['type'] === HTML5_Tokenizer::STARTTAG &&
2403
                ($token['name'] === 'th' ||    $token['name'] === 'td')) {
2404
                    /* Parse error. Act as if a start tag with the tag name "tr" had
2405
                    been seen, then reprocess the current token. */
2406
                    $this->emitToken(array(
2407
                        'name' => 'tr',
2408
                        'type' => HTML5_Tokenizer::STARTTAG,
2409
                        'attr' => array()
2410
                    ));
2411
2412
                    $this->emitToken($token);
2413
2414
                /* An end tag whose tag name is one of: "tbody", "tfoot", "thead" */
2415
                } elseif ($token['type'] === HTML5_Tokenizer::ENDTAG &&
2416
                in_array($token['name'], array('tbody', 'tfoot', 'thead'))) {
2417
                    /* If the stack of open elements does not have an element in table
2418
                    scope with the same tag name as the token, this is a parse error.
2419
                    Ignore the token. */
2420
                    if (!$this->elementInScope($token['name'], self::SCOPE_TABLE)) {
2421
                        // Parse error
2422
                        $this->ignored = true;
2423
2424
                    /* Otherwise: */
2425
                    } else {
2426
                        /* Clear the stack back to a table body context. */
2427
                        $this->clearStackToTableContext($clear);
2428
2429
                        /* Pop the current node from the stack of open elements. Switch
2430
                        the insertion mode to "in table". */
2431
                        array_pop($this->stack);
2432
                        $this->mode = self::IN_TABLE;
2433
                    }
2434
2435
                /* A start tag whose tag name is one of: "caption", "col", "colgroup",
2436
                "tbody", "tfoot", "thead", or an end tag whose tag name is "table" */
2437
                } elseif (($token['type'] === HTML5_Tokenizer::STARTTAG && in_array($token['name'],
2438
                array('caption', 'col', 'colgroup', 'tbody', 'tfoot', 'thead'))) ||
2439
                ($token['type'] === HTML5_Tokenizer::ENDTAG && $token['name'] === 'table')) {
2440
                    /* If the stack of open elements does not have a tbody, thead, or
2441
                    tfoot element in table scope, this is a parse error. Ignore the
2442
                    token. (fragment case) */
2443
                    if (!$this->elementInScope(array('tbody', 'thead', 'tfoot'), self::SCOPE_TABLE)) {
2444
                        // parse error
2445
                        $this->ignored = true;
2446
2447
                    /* Otherwise: */
2448
                    } else {
2449
                        /* Clear the stack back to a table body context. */
2450
                        $this->clearStackToTableContext($clear);
2451
2452
                        /* Act as if an end tag with the same tag name as the current
2453
                        node ("tbody", "tfoot", or "thead") had been seen, then
2454
                        reprocess the current token. */
2455
                        $this->emitToken(array(
2456
                            'name' => end($this->stack)->tagName,
2457
                            'type' => HTML5_Tokenizer::ENDTAG
2458
                        ));
2459
2460
                        $this->emitToken($token);
2461
                    }
2462
2463
                /* An end tag whose tag name is one of: "body", "caption", "col",
2464
                "colgroup", "html", "td", "th", "tr" */
2465
                } elseif ($token['type'] === HTML5_Tokenizer::ENDTAG && in_array($token['name'],
2466
                array('body', 'caption', 'col', 'colgroup', 'html', 'td', 'th', 'tr'))) {
2467
                    /* Parse error. Ignore the token. */
2468
                    $this->ignored = true;
2469
2470
                /* Anything else */
2471
                } else {
2472
                    /* Process the token as if the insertion mode was "in table". */
2473
                    $this->processWithRulesFor($token, self::IN_TABLE);
2474
                }
2475
            break;
2476
2477
            case self::IN_ROW:
2478
                $clear = array('tr', 'html');
2479
2480
                /* A start tag whose tag name is one of: "th", "td" */
2481
                if ($token['type'] === HTML5_Tokenizer::STARTTAG &&
2482
                ($token['name'] === 'th' || $token['name'] === 'td')) {
2483
                    /* Clear the stack back to a table row context. */
2484
                    $this->clearStackToTableContext($clear);
2485
2486
                    /* Insert an HTML element for the token, then switch the insertion
2487
                    mode to "in cell". */
2488
                    $this->insertElement($token);
2489
                    $this->mode = self::IN_CELL;
2490
2491
                    /* Insert a marker at the end of the list of active formatting
2492
                    elements. */
2493
                    $this->a_formatting[] = self::MARKER;
2494
2495
                /* An end tag whose tag name is "tr" */
2496
                } elseif ($token['type'] === HTML5_Tokenizer::ENDTAG && $token['name'] === 'tr') {
2497
                    /* If the stack of open elements does not have an element in table
2498
                    scope with the same tag name as the token, this is a parse error.
2499
                    Ignore the token. (fragment case) */
2500
                    if (!$this->elementInScope($token['name'], self::SCOPE_TABLE)) {
2501
                        // Ignore.
2502
                        $this->ignored = true;
2503
                    } else {
2504
                        /* Clear the stack back to a table row context. */
2505
                        $this->clearStackToTableContext($clear);
2506
2507
                        /* Pop the current node (which will be a tr element) from the
2508
                        stack of open elements. Switch the insertion mode to "in table
2509
                        body". */
2510
                        array_pop($this->stack);
2511
                        $this->mode = self::IN_TABLE_BODY;
2512
                    }
2513
2514
                /* A start tag whose tag name is one of: "caption", "col", "colgroup",
2515
                "tbody", "tfoot", "thead", "tr" or an end tag whose tag name is "table" */
2516
                } elseif (($token['type'] === HTML5_Tokenizer::STARTTAG && in_array($token['name'],
2517
                array('caption', 'col', 'colgroup', 'tbody', 'tfoot', 'thead', 'tr'))) ||
2518
                ($token['type'] === HTML5_Tokenizer::ENDTAG && $token['name'] === 'table')) {
2519
                    /* Act as if an end tag with the tag name "tr" had been seen, then,
2520
                    if that token wasn't ignored, reprocess the current token. */
2521
                    $this->emitToken(array(
2522
                        'name' => 'tr',
2523
                        'type' => HTML5_Tokenizer::ENDTAG
2524
                    ));
2525
                    if (!$this->ignored) {
2526
                        $this->emitToken($token);
2527
                    }
2528
2529
                /* An end tag whose tag name is one of: "tbody", "tfoot", "thead" */
2530
                } elseif ($token['type'] === HTML5_Tokenizer::ENDTAG &&
2531
                in_array($token['name'], array('tbody', 'tfoot', 'thead'))) {
2532
                    /* If the stack of open elements does not have an element in table
2533
                    scope with the same tag name as the token, this is a parse error.
2534
                    Ignore the token. */
2535
                    if (!$this->elementInScope($token['name'], self::SCOPE_TABLE)) {
2536
                        $this->ignored = true;
2537
2538
                    /* Otherwise: */
2539
                    } else {
2540
                        /* Otherwise, act as if an end tag with the tag name "tr" had
2541
                        been seen, then reprocess the current token. */
2542
                        $this->emitToken(array(
2543
                            'name' => 'tr',
2544
                            'type' => HTML5_Tokenizer::ENDTAG
2545
                        ));
2546
2547
                        $this->emitToken($token);
2548
                    }
2549
2550
                /* An end tag whose tag name is one of: "body", "caption", "col",
2551
                "colgroup", "html", "td", "th" */
2552
                } elseif ($token['type'] === HTML5_Tokenizer::ENDTAG && in_array($token['name'],
2553
                array('body', 'caption', 'col', 'colgroup', 'html', 'td', 'th'))) {
2554
                    /* Parse error. Ignore the token. */
2555
                    $this->ignored = true;
2556
2557
                /* Anything else */
2558
                } else {
2559
                    /* Process the token as if the insertion mode was "in table". */
2560
                    $this->processWithRulesFor($token, self::IN_TABLE);
2561
                }
2562
            break;
2563
2564
            case self::IN_CELL:
2565
                /* An end tag whose tag name is one of: "td", "th" */
2566
                if ($token['type'] === HTML5_Tokenizer::ENDTAG &&
2567
                ($token['name'] === 'td' || $token['name'] === 'th')) {
2568
                    /* If the stack of open elements does not have an element in table
2569
                    scope with the same tag name as that of the token, then this is a
2570
                    parse error and the token must be ignored. */
2571
                    if (!$this->elementInScope($token['name'], self::SCOPE_TABLE)) {
2572
                        $this->ignored = true;
2573
2574
                    /* Otherwise: */
2575
                    } else {
2576
                        /* Generate implied end tags, except for elements with the same
2577
                        tag name as the token. */
2578
                        $this->generateImpliedEndTags(array($token['name']));
2579
2580
                        /* Now, if the current node is not an element with the same tag
2581
                        name as the token, then this is a parse error. */
2582
                        // XERROR: Implement parse error code
2583
2584
                        /* Pop elements from this stack until an element with the same
2585
                        tag name as the token has been popped from the stack. */
2586
                        do {
2587
                            $node = array_pop($this->stack);
2588
                        } while ($node->tagName !== $token['name']);
2589
2590
                        /* Clear the list of active formatting elements up to the last
2591
                        marker. */
2592
                        $this->clearTheActiveFormattingElementsUpToTheLastMarker();
2593
2594
                        /* Switch the insertion mode to "in row". (The current node
2595
                        will be a tr element at this point.) */
2596
                        $this->mode = self::IN_ROW;
2597
                    }
2598
2599
                /* A start tag whose tag name is one of: "caption", "col", "colgroup",
2600
                "tbody", "td", "tfoot", "th", "thead", "tr" */
2601
                } elseif ($token['type'] === HTML5_Tokenizer::STARTTAG && in_array($token['name'],
2602
                array('caption', 'col', 'colgroup', 'tbody', 'td', 'tfoot', 'th',
2603
                'thead', 'tr'))) {
2604
                    /* If the stack of open elements does not have a td or th element
2605
                    in table scope, then this is a parse error; ignore the token.
2606
                    (fragment case) */
2607
                    if (!$this->elementInScope(array('td', 'th'), self::SCOPE_TABLE)) {
2608
                        // parse error
2609
                        $this->ignored = true;
2610
2611
                    /* Otherwise, close the cell (see below) and reprocess the current
2612
                    token. */
2613
                    } else {
2614
                        $this->closeCell();
2615
                        $this->emitToken($token);
2616
                    }
2617
2618
                /* An end tag whose tag name is one of: "body", "caption", "col",
2619
                "colgroup", "html" */
2620
                } elseif ($token['type'] === HTML5_Tokenizer::ENDTAG && in_array($token['name'],
2621
                array('body', 'caption', 'col', 'colgroup', 'html'))) {
2622
                    /* Parse error. Ignore the token. */
2623
                    $this->ignored = true;
2624
2625
                /* An end tag whose tag name is one of: "table", "tbody", "tfoot",
2626
                "thead", "tr" */
2627
                } elseif ($token['type'] === HTML5_Tokenizer::ENDTAG && in_array($token['name'],
2628
                array('table', 'tbody', 'tfoot', 'thead', 'tr'))) {
2629
                    /* If the stack of open elements does not have a td or th element
2630
                    in table scope, then this is a parse error; ignore the token.
2631
                    (innerHTML case) */
2632
                    if (!$this->elementInScope(array('td', 'th'), self::SCOPE_TABLE)) {
2633
                        // Parse error
2634
                        $this->ignored = true;
2635
2636
                    /* Otherwise, close the cell (see below) and reprocess the current
2637
                    token. */
2638
                    } else {
2639
                        $this->closeCell();
2640
                        $this->emitToken($token);
2641
                    }
2642
2643
                /* Anything else */
2644
                } else {
2645
                    /* Process the token as if the insertion mode was "in body". */
2646
                    $this->processWithRulesFor($token, self::IN_BODY);
2647
                }
2648
            break;
2649
2650
            case self::IN_SELECT:
2651
                /* Handle the token as follows: */
2652
2653
                /* A character token */
2654
                if (
2655
                    $token['type'] === HTML5_Tokenizer::CHARACTER ||
2656
                    $token['type'] === HTML5_Tokenizer::SPACECHARACTER
2657
                ) {
2658
                    /* Append the token's character to the current node. */
2659
                    $this->insertText($token['data']);
2660
2661
                /* A comment token */
2662
                } elseif ($token['type'] === HTML5_Tokenizer::COMMENT) {
2663
                    /* Append a Comment node to the current node with the data
2664
                    attribute set to the data given in the comment token. */
2665
                    $this->insertComment($token['data']);
2666
2667
                } elseif ($token['type'] === HTML5_Tokenizer::DOCTYPE) {
2668
                    // parse error
2669
2670
                } elseif ($token['type'] === HTML5_Tokenizer::STARTTAG && $token['name'] === 'html') {
2671
                    $this->processWithRulesFor($token, self::IN_BODY);
2672
2673
                /* A start tag token whose tag name is "option" */
2674
                } elseif ($token['type'] === HTML5_Tokenizer::STARTTAG &&
2675
                $token['name'] === 'option') {
2676
                    /* If the current node is an option element, act as if an end tag
2677
                    with the tag name "option" had been seen. */
2678
                    if (end($this->stack)->tagName === 'option') {
2679
                        $this->emitToken(array(
2680
                            'name' => 'option',
2681
                            'type' => HTML5_Tokenizer::ENDTAG
2682
                        ));
2683
                    }
2684
2685
                    /* Insert an HTML element for the token. */
2686
                    $this->insertElement($token);
2687
2688
                /* A start tag token whose tag name is "optgroup" */
2689
                } elseif ($token['type'] === HTML5_Tokenizer::STARTTAG &&
2690
                $token['name'] === 'optgroup') {
2691
                    /* If the current node is an option element, act as if an end tag
2692
                    with the tag name "option" had been seen. */
2693
                    if (end($this->stack)->tagName === 'option') {
2694
                        $this->emitToken(array(
2695
                            'name' => 'option',
2696
                            'type' => HTML5_Tokenizer::ENDTAG
2697
                        ));
2698
                    }
2699
2700
                    /* If the current node is an optgroup element, act as if an end tag
2701
                    with the tag name "optgroup" had been seen. */
2702
                    if (end($this->stack)->tagName === 'optgroup') {
2703
                        $this->emitToken(array(
2704
                            'name' => 'optgroup',
2705
                            'type' => HTML5_Tokenizer::ENDTAG
2706
                        ));
2707
                    }
2708
2709
                    /* Insert an HTML element for the token. */
2710
                    $this->insertElement($token);
2711
2712
                /* An end tag token whose tag name is "optgroup" */
2713
                } elseif ($token['type'] === HTML5_Tokenizer::ENDTAG &&
2714
                $token['name'] === 'optgroup') {
2715
                    /* First, if the current node is an option element, and the node
2716
                    immediately before it in the stack of open elements is an optgroup
2717
                    element, then act as if an end tag with the tag name "option" had
2718
                    been seen. */
2719
                    $elements_in_stack = count($this->stack);
2720
2721
                    if ($this->stack[$elements_in_stack - 1]->tagName === 'option' &&
2722
                    $this->stack[$elements_in_stack - 2]->tagName === 'optgroup') {
2723
                        $this->emitToken(array(
2724
                            'name' => 'option',
2725
                            'type' => HTML5_Tokenizer::ENDTAG
2726
                        ));
2727
                    }
2728
2729
                    /* If the current node is an optgroup element, then pop that node
2730
                    from the stack of open elements. Otherwise, this is a parse error,
2731
                    ignore the token. */
2732
                    if (end($this->stack)->tagName === 'optgroup') {
2733
                        array_pop($this->stack);
2734
                    } else {
2735
                        // parse error
2736
                        $this->ignored = true;
2737
                    }
2738
2739
                /* An end tag token whose tag name is "option" */
2740
                } elseif ($token['type'] === HTML5_Tokenizer::ENDTAG &&
2741
                $token['name'] === 'option') {
2742
                    /* If the current node is an option element, then pop that node
2743
                    from the stack of open elements. Otherwise, this is a parse error,
2744
                    ignore the token. */
2745
                    if (end($this->stack)->tagName === 'option') {
2746
                        array_pop($this->stack);
2747
                    } else {
2748
                        // parse error
2749
                        $this->ignored = true;
2750
                    }
2751
2752
                /* An end tag whose tag name is "select" */
2753
                } elseif ($token['type'] === HTML5_Tokenizer::ENDTAG &&
2754
                $token['name'] === 'select') {
2755
                    /* If the stack of open elements does not have an element in table
2756
                    scope with the same tag name as the token, this is a parse error.
2757
                    Ignore the token. (fragment case) */
2758
                    if (!$this->elementInScope($token['name'], self::SCOPE_TABLE)) {
2759
                        $this->ignored = true;
2760
                        // parse error
2761
2762
                    /* Otherwise: */
2763
                    } else {
2764
                        /* Pop elements from the stack of open elements until a select
2765
                        element has been popped from the stack. */
2766
                        do {
2767
                            $node = array_pop($this->stack);
2768
                        } while ($node->tagName !== 'select');
2769
2770
                        /* Reset the insertion mode appropriately. */
2771
                        $this->resetInsertionMode();
2772
                    }
2773
2774
                /* A start tag whose tag name is "select" */
2775
                } elseif ($token['type'] === HTML5_Tokenizer::STARTTAG && $token['name'] === 'select') {
2776
                    /* Parse error. Act as if the token had been an end tag with the
2777
                    tag name "select" instead. */
2778
                    $this->emitToken(array(
2779
                        'name' => 'select',
2780
                        'type' => HTML5_Tokenizer::ENDTAG
2781
                    ));
2782
2783
                } elseif ($token['type'] === HTML5_Tokenizer::STARTTAG &&
2784
                ($token['name'] === 'input' || $token['name'] === 'keygen' ||  $token['name'] === 'textarea')) {
2785
                    // parse error
2786
                    $this->emitToken(array(
2787
                        'name' => 'select',
2788
                        'type' => HTML5_Tokenizer::ENDTAG
2789
                    ));
2790
                    $this->emitToken($token);
2791
2792
                } elseif ($token['type'] === HTML5_Tokenizer::STARTTAG && $token['name'] === 'script') {
2793
                    $this->processWithRulesFor($token, self::IN_HEAD);
2794
2795
                } elseif ($token['type'] === HTML5_Tokenizer::EOF) {
2796
                    // XERROR: If the current node is not the root html element, then this is a parse error.
2797
                    /* Stop parsing */
2798
2799
                /* Anything else */
2800
                } else {
2801
                    /* Parse error. Ignore the token. */
2802
                    $this->ignored = true;
2803
                }
2804
            break;
2805
2806
            case self::IN_SELECT_IN_TABLE:
2807
2808
                if ($token['type'] === HTML5_Tokenizer::STARTTAG &&
2809
                in_array($token['name'], array('caption', 'table', 'tbody',
2810
                'tfoot', 'thead', 'tr', 'td', 'th'))) {
2811
                    // parse error
2812
                    $this->emitToken(array(
2813
                        'name' => 'select',
2814
                        'type' => HTML5_Tokenizer::ENDTAG,
2815
                    ));
2816
                    $this->emitToken($token);
2817
2818
                /* An end tag whose tag name is one of: "caption", "table", "tbody",
2819
                "tfoot", "thead", "tr", "td", "th" */
2820
                } elseif ($token['type'] === HTML5_Tokenizer::ENDTAG &&
2821
                in_array($token['name'], array('caption', 'table', 'tbody', 'tfoot', 'thead', 'tr', 'td', 'th')))  {
2822
                    /* Parse error. */
2823
                    // parse error
2824
2825
                    /* If the stack of open elements has an element in table scope with
2826
                    the same tag name as that of the token, then act as if an end tag
2827
                    with the tag name "select" had been seen, and reprocess the token.
2828
                    Otherwise, ignore the token. */
2829
                    if ($this->elementInScope($token['name'], self::SCOPE_TABLE)) {
2830
                        $this->emitToken(array(
2831
                            'name' => 'select',
2832
                            'type' => HTML5_Tokenizer::ENDTAG
2833
                        ));
2834
2835
                        $this->emitToken($token);
2836
                    } else {
2837
                        $this->ignored = true;
2838
                    }
2839
                } else {
2840
                    $this->processWithRulesFor($token, self::IN_SELECT);
2841
                }
2842
            break;
2843
2844
            case self::IN_FOREIGN_CONTENT:
2845
                if ($token['type'] === HTML5_Tokenizer::CHARACTER ||
2846
                $token['type'] === HTML5_Tokenizer::SPACECHARACTER) {
2847
                    $this->insertText($token['data']);
2848
                } elseif ($token['type'] === HTML5_Tokenizer::COMMENT) {
2849
                    $this->insertComment($token['data']);
2850
                } elseif ($token['type'] === HTML5_Tokenizer::DOCTYPE) {
2851
                    // XERROR: parse error
2852
                } elseif ($token['type'] === HTML5_Tokenizer::ENDTAG &&
2853
                $token['name'] === 'script' && end($this->stack)->tagName === 'script' &&
2854
                // XDOM
2855
                end($this->stack)->namespaceURI === self::NS_SVG) {
2856
                    array_pop($this->stack);
2857
                    // a bunch of script running mumbo jumbo
2858
                } elseif (
2859
                    ($token['type'] === HTML5_Tokenizer::STARTTAG &&
2860
                        ((
2861
                            $token['name'] !== 'mglyph' &&
2862
                            $token['name'] !== 'malignmark' &&
2863
                            // XDOM
2864
                            end($this->stack)->namespaceURI === self::NS_MATHML &&
2865
                            in_array(end($this->stack)->tagName, array('mi', 'mo', 'mn', 'ms', 'mtext'))
2866
                        ) ||
2867
                        (
2868
                            $token['name'] === 'svg' &&
2869
                            // XDOM
2870
                            end($this->stack)->namespaceURI === self::NS_MATHML &&
2871
                            end($this->stack)->tagName === 'annotation-xml'
2872
                        ) ||
2873
                        (
2874
                            // XDOM
2875
                            end($this->stack)->namespaceURI === self::NS_SVG &&
2876
                            in_array(end($this->stack)->tagName, array('foreignObject', 'desc', 'title'))
2877
                        ) ||
2878
                        (
2879
                            // XSKETCHY && XDOM
2880
                            end($this->stack)->namespaceURI === self::NS_HTML
2881
                        ))
2882
                    ) || $token['type'] === HTML5_Tokenizer::ENDTAG
2883
                ) {
2884
                    $this->processWithRulesFor($token, $this->secondary_mode);
2885
                    /* If, after doing so, the insertion mode is still "in foreign
2886
                     * content", but there is no element in scope that has a namespace
2887
                     * other than the HTML namespace, switch the insertion mode to the
2888
                     * secondary insertion mode. */
2889
                    if ($this->mode === self::IN_FOREIGN_CONTENT) {
2890
                        $found = false;
2891
                        // this basically duplicates elementInScope()
2892
                        for ($i = count($this->stack) - 1; $i >= 0; $i--) {
2893
                            // XDOM
2894
                            $node = $this->stack[$i];
2895
                            if ($node->namespaceURI !== self::NS_HTML) {
2896
                                $found = true;
2897
                                break;
2898
                            } elseif (in_array($node->tagName, array('table', 'html',
2899
                            'applet', 'caption', 'td', 'th', 'button', 'marquee',
2900
                            'object')) || ($node->tagName === 'foreignObject' &&
2901
                            $node->namespaceURI === self::NS_SVG)) {
2902
                                break;
2903
                            }
2904
                        }
2905
                        if (!$found) {
2906
                            $this->mode = $this->secondary_mode;
2907
                        }
2908
                    }
2909
                } elseif ($token['type'] === HTML5_Tokenizer::EOF || (
2910
                $token['type'] === HTML5_Tokenizer::STARTTAG &&
2911
                (in_array($token['name'], array('b', "big", "blockquote", "body", "br",
2912
                "center", "code", "dc", "dd", "div", "dl", "ds", "dt", "em", "embed", "h1", "h2",
2913
                "h3", "h4", "h5", "h6", "head", "hr", "i", "img", "li", "listing",
2914
                "menu", "meta", "nobr", "ol", "p", "pre", "ruby", "s",  "small",
2915
                "span", "strong", "strike",  "sub", "sup", "table", "tt", "u", "ul",
2916
                "var")) || ($token['name'] === 'font' && ($this->getAttr($token, 'color') ||
2917
                $this->getAttr($token, 'face') || $this->getAttr($token, 'size')))))) {
2918
                    // XERROR: parse error
2919
                    do {
2920
                        $node = array_pop($this->stack);
2921
                        // XDOM
2922
                    } while ($node->namespaceURI !== self::NS_HTML);
2923
                    $this->stack[] = $node;
2924
                    $this->mode = $this->secondary_mode;
2925
                    $this->emitToken($token);
2926
                } elseif ($token['type'] === HTML5_Tokenizer::STARTTAG) {
2927
                    static $svg_lookup = array(
2928
                        'altglyph' => 'altGlyph',
2929
                        'altglyphdef' => 'altGlyphDef',
2930
                        'altglyphitem' => 'altGlyphItem',
2931
                        'animatecolor' => 'animateColor',
2932
                        'animatemotion' => 'animateMotion',
2933
                        'animatetransform' => 'animateTransform',
2934
                        'clippath' => 'clipPath',
2935
                        'feblend' => 'feBlend',
2936
                        'fecolormatrix' => 'feColorMatrix',
2937
                        'fecomponenttransfer' => 'feComponentTransfer',
2938
                        'fecomposite' => 'feComposite',
2939
                        'feconvolvematrix' => 'feConvolveMatrix',
2940
                        'fediffuselighting' => 'feDiffuseLighting',
2941
                        'fedisplacementmap' => 'feDisplacementMap',
2942
                        'fedistantlight' => 'feDistantLight',
2943
                        'feflood' => 'feFlood',
2944
                        'fefunca' => 'feFuncA',
2945
                        'fefuncb' => 'feFuncB',
2946
                        'fefuncg' => 'feFuncG',
2947
                        'fefuncr' => 'feFuncR',
2948
                        'fegaussianblur' => 'feGaussianBlur',
2949
                        'feimage' => 'feImage',
2950
                        'femerge' => 'feMerge',
2951
                        'femergenode' => 'feMergeNode',
2952
                        'femorphology' => 'feMorphology',
2953
                        'feoffset' => 'feOffset',
2954
                        'fepointlight' => 'fePointLight',
2955
                        'fespecularlighting' => 'feSpecularLighting',
2956
                        'fespotlight' => 'feSpotLight',
2957
                        'fetile' => 'feTile',
2958
                        'feturbulence' => 'feTurbulence',
2959
                        'foreignobject' => 'foreignObject',
2960
                        'glyphref' => 'glyphRef',
2961
                        'lineargradient' => 'linearGradient',
2962
                        'radialgradient' => 'radialGradient',
2963
                        'textpath' => 'textPath',
2964
                    );
2965
                    // XDOM
2966
                    $current = end($this->stack);
2967
                    if ($current->namespaceURI === self::NS_MATHML) {
2968
                        $token = $this->adjustMathMLAttributes($token);
2969
                    }
2970
                    if ($current->namespaceURI === self::NS_SVG &&
2971
                    isset($svg_lookup[$token['name']])) {
2972
                        $token['name'] = $svg_lookup[$token['name']];
2973
                    }
2974
                    if ($current->namespaceURI === self::NS_SVG) {
2975
                        $token = $this->adjustSVGAttributes($token);
2976
                    }
2977
                    $token = $this->adjustForeignAttributes($token);
2978
                    $this->insertForeignElement($token, $current->namespaceURI);
2979
                    if (isset($token['self-closing'])) {
2980
                        array_pop($this->stack);
2981
                        // XERROR: acknowledge self-closing flag
2982
                    }
2983
                }
2984
            break;
2985
2986
            case self::AFTER_BODY:
2987
                /* Handle the token as follows: */
2988
2989
                /* A character token that is one of one of U+0009 CHARACTER TABULATION,
2990
                U+000A LINE FEED (LF), U+000B LINE TABULATION, U+000C FORM FEED (FF),
2991
                or U+0020 SPACE */
2992
                if ($token['type'] === HTML5_Tokenizer::SPACECHARACTER) {
2993
                    /* Process the token as it would be processed if the insertion mode
2994
                    was "in body". */
2995
                    $this->processWithRulesFor($token, self::IN_BODY);
2996
2997
                /* A comment token */
2998
                } elseif ($token['type'] === HTML5_Tokenizer::COMMENT) {
2999
                    /* Append a Comment node to the first element in the stack of open
3000
                    elements (the html element), with the data attribute set to the
3001
                    data given in the comment token. */
3002
                    // XDOM
3003
                    $comment = $this->dom->createComment($token['data']);
3004
                    $this->stack[0]->appendChild($comment);
3005
3006
                } elseif ($token['type'] === HTML5_Tokenizer::DOCTYPE) {
3007
                    // parse error
3008
3009
                } elseif ($token['type'] === HTML5_Tokenizer::STARTTAG && $token['name'] === 'html') {
3010
                    $this->processWithRulesFor($token, self::IN_BODY);
3011
3012
                /* An end tag with the tag name "html" */
3013
                } elseif ($token['type'] === HTML5_Tokenizer::ENDTAG && $token['name'] === 'html') {
3014
                    /*     If the parser was originally created as part of the HTML
3015
                     *     fragment parsing algorithm, this is a parse error; ignore
3016
                     *     the token. (fragment case) */
3017
                    $this->ignored = true;
3018
                    // XERROR: implement this
3019
3020
                    $this->mode = self::AFTER_AFTER_BODY;
3021
3022
                } elseif ($token['type'] === HTML5_Tokenizer::EOF) {
3023
                    /* Stop parsing */
3024
3025
                /* Anything else */
3026
                } else {
3027
                    /* Parse error. Set the insertion mode to "in body" and reprocess
3028
                    the token. */
3029
                    $this->mode = self::IN_BODY;
3030
                    $this->emitToken($token);
3031
                }
3032
            break;
3033
3034
            case self::IN_FRAMESET:
3035
                /* Handle the token as follows: */
3036
3037
                /* A character token that is one of one of U+0009 CHARACTER TABULATION,
3038
                U+000A LINE FEED (LF), U+000B LINE TABULATION, U+000C FORM FEED (FF),
3039
                U+000D CARRIAGE RETURN (CR), or U+0020 SPACE */
3040
                if ($token['type'] === HTML5_Tokenizer::SPACECHARACTER) {
3041
                    /* Append the character to the current node. */
3042
                    $this->insertText($token['data']);
3043
3044
                /* A comment token */
3045
                } elseif ($token['type'] === HTML5_Tokenizer::COMMENT) {
3046
                    /* Append a Comment node to the current node with the data
3047
                    attribute set to the data given in the comment token. */
3048
                    $this->insertComment($token['data']);
3049
3050
                } elseif ($token['type'] === HTML5_Tokenizer::DOCTYPE) {
3051
                    // parse error
3052
3053
                /* A start tag with the tag name "frameset" */
3054
                } elseif ($token['type'] === HTML5_Tokenizer::STARTTAG &&
3055
                $token['name'] === 'frameset') {
3056
                    $this->insertElement($token);
3057
3058
                /* An end tag with the tag name "frameset" */
3059
                } elseif ($token['type'] === HTML5_Tokenizer::ENDTAG &&
3060
                $token['name'] === 'frameset') {
3061
                    /* If the current node is the root html element, then this is a
3062
                    parse error; ignore the token. (fragment case) */
3063
                    if (end($this->stack)->tagName === 'html') {
3064
                        $this->ignored = true;
3065
                        // Parse error
3066
3067
                    } else {
3068
                        /* Otherwise, pop the current node from the stack of open
3069
                        elements. */
3070
                        array_pop($this->stack);
3071
3072
                        /* If the parser was not originally created as part of the HTML
3073
                         * fragment parsing algorithm  (fragment case), and the current
3074
                         * node is no longer a frameset element, then switch the
3075
                         * insertion mode to "after frameset". */
3076
                        $this->mode = self::AFTER_FRAMESET;
3077
                    }
3078
3079
                /* A start tag with the tag name "frame" */
3080
                } elseif ($token['type'] === HTML5_Tokenizer::STARTTAG &&
3081
                $token['name'] === 'frame') {
3082
                    /* Insert an HTML element for the token. */
3083
                    $this->insertElement($token);
3084
3085
                    /* Immediately pop the current node off the stack of open elements. */
3086
                    array_pop($this->stack);
3087
3088
                    // XERROR: Acknowledge the token's self-closing flag, if it is set.
3089
3090
                /* A start tag with the tag name "noframes" */
3091
                } elseif ($token['type'] === HTML5_Tokenizer::STARTTAG &&
3092
                $token['name'] === 'noframes') {
3093
                    /* Process the token using the rules for the "in head" insertion mode. */
3094
                    $this->processwithRulesFor($token, self::IN_HEAD);
3095
3096
                } elseif ($token['type'] === HTML5_Tokenizer::EOF) {
3097
                    // XERROR: If the current node is not the root html element, then this is a parse error.
3098
                    /* Stop parsing */
3099
                /* Anything else */
3100
                } else {
3101
                    /* Parse error. Ignore the token. */
3102
                    $this->ignored = true;
3103
                }
3104
            break;
3105
3106
            case self::AFTER_FRAMESET:
3107
                /* Handle the token as follows: */
3108
3109
                /* A character token that is one of one of U+0009 CHARACTER TABULATION,
3110
                U+000A LINE FEED (LF), U+000B LINE TABULATION, U+000C FORM FEED (FF),
3111
                U+000D CARRIAGE RETURN (CR), or U+0020 SPACE */
3112
                if ($token['type'] === HTML5_Tokenizer::SPACECHARACTER) {
3113
                    /* Append the character to the current node. */
3114
                    $this->insertText($token['data']);
3115
3116
                /* A comment token */
3117
                } elseif ($token['type'] === HTML5_Tokenizer::COMMENT) {
3118
                    /* Append a Comment node to the current node with the data
3119
                    attribute set to the data given in the comment token. */
3120
                    $this->insertComment($token['data']);
3121
3122
                } elseif ($token['type'] === HTML5_Tokenizer::DOCTYPE) {
3123
                    // parse error
3124
3125
                } elseif ($token['type'] === HTML5_Tokenizer::STARTTAG && $token['name'] === 'html') {
3126
                    $this->processWithRulesFor($token, self::IN_BODY);
3127
3128
                /* An end tag with the tag name "html" */
3129
                } elseif ($token['type'] === HTML5_Tokenizer::ENDTAG &&
3130
                $token['name'] === 'html') {
3131
                    $this->mode = self::AFTER_AFTER_FRAMESET;
3132
3133
                /* A start tag with the tag name "noframes" */
3134
                } elseif ($token['type'] === HTML5_Tokenizer::STARTTAG &&
3135
                $token['name'] === 'noframes') {
3136
                    $this->processWithRulesFor($token, self::IN_HEAD);
3137
3138
                } elseif ($token['type'] === HTML5_Tokenizer::EOF) {
3139
                    /* Stop parsing */
3140
3141
                /* Anything else */
3142
                } else {
3143
                    /* Parse error. Ignore the token. */
3144
                    $this->ignored = true;
3145
                }
3146
            break;
3147
3148
            case self::AFTER_AFTER_BODY:
3149
                /* A comment token */
3150
                if ($token['type'] === HTML5_Tokenizer::COMMENT) {
3151
                    /* Append a Comment node to the Document object with the data
3152
                    attribute set to the data given in the comment token. */
3153
                    // XDOM
3154
                    $comment = $this->dom->createComment($token['data']);
3155
                    $this->dom->appendChild($comment);
3156
3157
                } elseif ($token['type'] === HTML5_Tokenizer::DOCTYPE ||
3158
                $token['type'] === HTML5_Tokenizer::SPACECHARACTER ||
3159
                ($token['type'] === HTML5_Tokenizer::STARTTAG && $token['name'] === 'html')) {
3160
                    $this->processWithRulesFor($token, self::IN_BODY);
3161
3162
                /* An end-of-file token */
3163
                } elseif ($token['type'] === HTML5_Tokenizer::EOF) {
3164
                    /* OMG DONE!! */
3165
                } else {
3166
                    // parse error
3167
                    $this->mode = self::IN_BODY;
3168
                    $this->emitToken($token);
3169
                }
3170
            break;
3171
3172
            case self::AFTER_AFTER_FRAMESET:
3173
                /* A comment token */
3174
                if ($token['type'] === HTML5_Tokenizer::COMMENT) {
3175
                    /* Append a Comment node to the Document object with the data
3176
                    attribute set to the data given in the comment token. */
3177
                    // XDOM
3178
                    $comment = $this->dom->createComment($token['data']);
3179
                    $this->dom->appendChild($comment);
3180
                } elseif ($token['type'] === HTML5_Tokenizer::DOCTYPE ||
3181
                $token['type'] === HTML5_Tokenizer::SPACECHARACTER ||
3182
                ($token['type'] === HTML5_Tokenizer::STARTTAG && $token['name'] === 'html')) {
3183
                    $this->processWithRulesFor($token, self::IN_BODY);
3184
3185
                /* An end-of-file token */
3186
                } elseif ($token['type'] === HTML5_Tokenizer::EOF) {
3187
                    /* OMG DONE!! */
3188
                } elseif ($token['type'] === HTML5_Tokenizer::STARTTAG && $token['name'] === 'nofrmaes') {
3189
                    $this->processWithRulesFor($token, self::IN_HEAD);
3190
                } else {
3191
                    // parse error
3192
                }
3193
            break;
3194
        }
3195
    }
3196
3197
    private function insertElement($token, $append = true) {
3198
        $el = $this->dom->createElementNS(self::NS_HTML, $token['name']);
3199
3200
        if (!empty($token['attr'])) {
3201
            foreach ($token['attr'] as $attr) {
3202
                if (!$el->hasAttribute($attr['name']) && preg_match("/^[a-zA-Z_:]/", $attr['name'])) {
3203
                    $el->setAttribute($attr['name'], $attr['value']);
3204
                }
3205
            }
3206
        }
3207
        if ($append) {
3208
            $this->appendToRealParent($el);
3209
            $this->stack[] = $el;
3210
        }
3211
3212
        return $el;
3213
    }
3214
3215
    /**
3216
     * @param $data
3217
     */
3218
    private function insertText($data) {
3219
        if ($data === '') {
3220
            return;
3221
        }
3222
        if ($this->ignore_lf_token) {
3223
            if ($data[0] === "\n") {
3224
                $data = substr($data, 1);
3225
                if ($data === false) {
3226
                    return;
3227
                }
3228
            }
3229
        }
3230
        $text = $this->dom->createTextNode($data);
3231
        $this->appendToRealParent($text);
3232
    }
3233
3234
    /**
3235
     * @param $data
3236
     */
3237
    private function insertComment($data) {
3238
        $comment = $this->dom->createComment($data);
3239
        $this->appendToRealParent($comment);
3240
    }
3241
3242
    /**
3243
     * @param $node
3244
     */
3245
    private function appendToRealParent($node) {
3246
        // this is only for the foster_parent case
3247
        /* If the current node is a table, tbody, tfoot, thead, or tr
3248
        element, then, whenever a node would be inserted into the current
3249
        node, it must instead be inserted into the foster parent element. */
3250
        if (
3251
            !$this->foster_parent ||
3252
            !in_array(
3253
                end($this->stack)->tagName,
3254
                array('table', 'tbody', 'tfoot', 'thead', 'tr')
3255
            )
3256
        ) {
3257
            end($this->stack)->appendChild($node);
3258
        } else {
3259
            $this->fosterParent($node);
3260
        }
3261
    }
3262
3263
    /**
3264
     * @param $el
3265
     * @param int $scope
3266
     * @return bool|null
3267
     */
3268
    private function elementInScope($el, $scope = self::SCOPE) {
3269
        if (is_array($el)) {
3270
            foreach($el as $element) {
3271
                if ($this->elementInScope($element, $scope)) {
3272
                    return true;
3273
                }
3274
            }
3275
3276
            return false;
3277
        }
3278
3279
        $leng = count($this->stack);
3280
3281
        for ($n = 0; $n < $leng; $n++) {
3282
            /* 1. Initialise node to be the current node (the bottommost node of
3283
            the stack). */
3284
            $node = $this->stack[$leng - 1 - $n];
3285
3286
            if ($node->tagName === $el) {
3287
                /* 2. If node is the target node, terminate in a match state. */
3288
                return true;
3289
3290
                // We've expanded the logic for these states a little differently;
3291
                // Hixie's refactoring into "specific scope" is more general, but
3292
                // this "gets the job done"
3293
3294
            // these are the common states for all scopes
3295
            } elseif ($node->tagName === 'table' || $node->tagName === 'html') {
3296
                return false;
3297
3298
            // these are valid for "in scope" and "in list item scope"
3299
            } elseif ($scope !== self::SCOPE_TABLE &&
3300
            (in_array($node->tagName, array('applet', 'caption', 'td',
3301
                'th', 'button', 'marquee', 'object')) ||
3302
                $node->tagName === 'foreignObject' && $node->namespaceURI === self::NS_SVG)) {
3303
                return false;
3304
3305
3306
            // these are valid for "in list item scope"
3307
            } elseif ($scope === self::SCOPE_LISTITEM && in_array($node->tagName, array('ol', 'ul'))) {
3308
                return false;
3309
            }
3310
3311
            /* Otherwise, set node to the previous entry in the stack of open
3312
            elements and return to step 2. (This will never fail, since the loop
3313
            will always terminate in the previous step if the top of the stack
3314
            is reached.) */
3315
        }
3316
3317
        // To fix warning. This never happens or should return true/false
3318
        return null;
3319
    }
3320
3321
    /**
3322
     * @return bool
3323
     */
3324
    private function reconstructActiveFormattingElements() {
3325
        /* 1. If there are no entries in the list of active formatting elements,
3326
        then there is nothing to reconstruct; stop this algorithm. */
3327
        $formatting_elements = count($this->a_formatting);
3328
3329
        if ($formatting_elements === 0) {
3330
            return false;
3331
        }
3332
3333
        /* 3. Let entry be the last (most recently added) element in the list
3334
        of active formatting elements. */
3335
        $entry = end($this->a_formatting);
3336
3337
        /* 2. If the last (most recently added) entry in the list of active
3338
        formatting elements is a marker, or if it is an element that is in the
3339
        stack of open elements, then there is nothing to reconstruct; stop this
3340
        algorithm. */
3341
        if ($entry === self::MARKER || in_array($entry, $this->stack, true)) {
3342
            return false;
3343
        }
3344
3345
        for ($a = $formatting_elements - 1; $a >= 0; true) {
3346
            /* 4. If there are no entries before entry in the list of active
3347
            formatting elements, then jump to step 8. */
3348
            if ($a === 0) {
3349
                $step_seven = false;
3350
                break;
3351
            }
3352
3353
            /* 5. Let entry be the entry one earlier than entry in the list of
3354
            active formatting elements. */
3355
            $a--;
3356
            $entry = $this->a_formatting[$a];
3357
3358
            /* 6. If entry is neither a marker nor an element that is also in
3359
            thetack of open elements, go to step 4. */
3360
            if ($entry === self::MARKER || in_array($entry, $this->stack, true)) {
3361
                break;
3362
            }
3363
        }
3364
3365
        while (true) {
3366
            /* 7. Let entry be the element one later than entry in the list of
3367
            active formatting elements. */
3368
            if (isset($step_seven) && $step_seven === true) {
3369
                $a++;
3370
                $entry = $this->a_formatting[$a];
3371
            }
3372
3373
            /* 8. Perform a shallow clone of the element entry to obtain clone. */
3374
            $clone = $entry->cloneNode();
3375
3376
            /* 9. Append clone to the current node and push it onto the stack
3377
            of open elements  so that it is the new current node. */
3378
            $this->appendToRealParent($clone);
3379
            $this->stack[] = $clone;
3380
3381
            /* 10. Replace the entry for entry in the list with an entry for
3382
            clone. */
3383
            $this->a_formatting[$a] = $clone;
3384
3385
            /* 11. If the entry for clone in the list of active formatting
3386
            elements is not the last entry in the list, return to step 7. */
3387
            if (end($this->a_formatting) !== $clone) {
3388
                $step_seven = true;
3389
            } else {
3390
                break;
3391
            }
3392
        }
3393
3394
        // Return value not in use ATM. Would just make sense to also return true here.
3395
        return true;
3396
    }
3397
3398
    /**
3399
     *
3400
     */
3401
    private function clearTheActiveFormattingElementsUpToTheLastMarker() {
3402
        /* When the steps below require the UA to clear the list of active
3403
        formatting elements up to the last marker, the UA must perform the
3404
        following steps: */
3405
3406
        while (true) {
3407
            /* 1. Let entry be the last (most recently added) entry in the list
3408
            of active formatting elements. */
3409
            $entry = end($this->a_formatting);
3410
3411
            /* 2. Remove entry from the list of active formatting elements. */
3412
            array_pop($this->a_formatting);
3413
3414
            /* 3. If entry was a marker, then stop the algorithm at this point.
3415
            The list has been cleared up to the last marker. */
3416
            if ($entry === self::MARKER) {
3417
                break;
3418
            }
3419
        }
3420
    }
3421
3422
    /**
3423
     * @param array $exclude
3424
     */
3425
    private function generateImpliedEndTags($exclude = array()) {
3426
        /* When the steps below require the UA to generate implied end tags,
3427
         * then, while the current node is a dc element, a dd element, a ds
3428
         * element, a dt element, an li element, an option element, an optgroup
3429
         * element, a p element, an rp element, or an rt element, the UA must
3430
         * pop the current node off the stack of open elements. */
3431
        $node = end($this->stack);
3432
        $elements = array_diff(array('dc', 'dd', 'ds', 'dt', 'li', 'p', 'td', 'th', 'tr'), $exclude);
3433
3434
        while (in_array(end($this->stack)->tagName, $elements)) {
3435
            array_pop($this->stack);
3436
        }
3437
    }
3438
3439
    /**
3440
     * @param $node
3441
     * @return int
3442
     */
3443
    private function getElementCategory($node) {
3444
        if (!is_object($node)) {
3445
            debug_print_backtrace();
3446
        }
3447
        $name = $node->tagName;
3448
        if (in_array($name, $this->special)) {
3449
            return self::SPECIAL;
3450
        } elseif (in_array($name, $this->scoping)) {
3451
            return self::SCOPING;
3452
        } elseif (in_array($name, $this->formatting)) {
3453
            return self::FORMATTING;
3454
        } else {
3455
            return self::PHRASING;
3456
        }
3457
    }
3458
3459
    /**
3460
     * @param $elements
3461
     */
3462
    private function clearStackToTableContext($elements) {
3463
        /* When the steps above require the UA to clear the stack back to a
3464
        table context, it means that the UA must, while the current node is not
3465
        a table element or an html element, pop elements from the stack of open
3466
        elements. */
3467
        while (true) {
3468
            $name = end($this->stack)->tagName;
3469
3470
            if (in_array($name, $elements)) {
3471
                break;
3472
            } else {
3473
                array_pop($this->stack);
3474
            }
3475
        }
3476
    }
3477
3478
    /**
3479
     * @param null $context
3480
     */
3481
    private function resetInsertionMode($context = null) {
3482
        /* 1. Let last be false. */
3483
        $last = false;
3484
        $leng = count($this->stack);
3485
3486
        for ($n = $leng - 1; $n >= 0; $n--) {
3487
            /* 2. Let node be the last node in the stack of open elements. */
3488
            $node = $this->stack[$n];
3489
3490
            /* 3. If node is the first node in the stack of open elements, then
3491
             * set last to true and set node to the context  element. (fragment
3492
             * case) */
3493
            if ($this->stack[0]->isSameNode($node)) {
3494
                $last = true;
3495
                $node = $context;
3496
            }
3497
3498
            /* 4. If node is a select element, then switch the insertion mode to
3499
            "in select" and abort these steps. (fragment case) */
3500
            if ($node->tagName === 'select') {
3501
                $this->mode = self::IN_SELECT;
3502
                break;
3503
3504
            /* 5. If node is a td or th element, then switch the insertion mode
3505
            to "in cell" and abort these steps. */
3506
            } elseif ($node->tagName === 'td' || $node->nodeName === 'th') {
3507
                $this->mode = self::IN_CELL;
3508
                break;
3509
3510
            /* 6. If node is a tr element, then switch the insertion mode to
3511
            "in    row" and abort these steps. */
3512
            } elseif ($node->tagName === 'tr') {
3513
                $this->mode = self::IN_ROW;
3514
                break;
3515
3516
            /* 7. If node is a tbody, thead, or tfoot element, then switch the
3517
            insertion mode to "in table body" and abort these steps. */
3518
            } elseif (in_array($node->tagName, array('tbody', 'thead', 'tfoot'))) {
3519
                $this->mode = self::IN_TABLE_BODY;
3520
                break;
3521
3522
            /* 8. If node is a caption element, then switch the insertion mode
3523
            to "in caption" and abort these steps. */
3524
            } elseif ($node->tagName === 'caption') {
3525
                $this->mode = self::IN_CAPTION;
3526
                break;
3527
3528
            /* 9. If node is a colgroup element, then switch the insertion mode
3529
            to "in column group" and abort these steps. (innerHTML case) */
3530
            } elseif ($node->tagName === 'colgroup') {
3531
                $this->mode = self::IN_COLUMN_GROUP;
3532
                break;
3533
3534
            /* 10. If node is a table element, then switch the insertion mode
3535
            to "in table" and abort these steps. */
3536
            } elseif ($node->tagName === 'table') {
3537
                $this->mode = self::IN_TABLE;
3538
                break;
3539
3540
            /* 11. If node is an element from the MathML namespace or the SVG
3541
             * namespace, then switch the insertion mode to "in foreign
3542
             * content", let the secondary insertion mode be "in body", and
3543
             * abort these steps. */
3544
            } elseif ($node->namespaceURI === self::NS_SVG ||
3545
            $node->namespaceURI === self::NS_MATHML) {
3546
                $this->mode = self::IN_FOREIGN_CONTENT;
3547
                $this->secondary_mode = self::IN_BODY;
3548
                break;
3549
3550
            /* 12. If node is a head element, then switch the insertion mode
3551
            to "in body" ("in body"! not "in head"!) and abort these steps.
3552
            (fragment case) */
3553
            } elseif ($node->tagName === 'head') {
3554
                $this->mode = self::IN_BODY;
3555
                break;
3556
3557
            /* 13. If node is a body element, then switch the insertion mode to
3558
            "in body" and abort these steps. */
3559
            } elseif ($node->tagName === 'body') {
3560
                $this->mode = self::IN_BODY;
3561
                break;
3562
3563
            /* 14. If node is a frameset element, then switch the insertion
3564
            mode to "in frameset" and abort these steps. (fragment case) */
3565
            } elseif ($node->tagName === 'frameset') {
3566
                $this->mode = self::IN_FRAMESET;
3567
                break;
3568
3569
            /* 15. If node is an html element, then: if the head element
3570
            pointer is null, switch the insertion mode to "before head",
3571
            otherwise, switch the insertion mode to "after head". In either
3572
            case, abort these steps. (fragment case) */
3573
            } elseif ($node->tagName === 'html') {
3574
                $this->mode = ($this->head_pointer === null)
3575
                    ? self::BEFORE_HEAD
3576
                    : self::AFTER_HEAD;
3577
3578
                break;
3579
3580
            /* 16. If last is true, then set the insertion mode to "in body"
3581
            and    abort these steps. (fragment case) */
3582
            } elseif ($last) {
3583
                $this->mode = self::IN_BODY;
3584
                break;
3585
            }
3586
        }
3587
    }
3588
3589
    /**
3590
     *
3591
     */
3592
    private function closeCell() {
3593
        /* If the stack of open elements has a td or th element in table scope,
3594
        then act as if an end tag token with that tag name had been seen. */
3595
        foreach (array('td', 'th') as $cell) {
3596
            if ($this->elementInScope($cell, self::SCOPE_TABLE)) {
3597
                $this->emitToken(array(
3598
                    'name' => $cell,
3599
                    'type' => HTML5_Tokenizer::ENDTAG
3600
                ));
3601
3602
                break;
3603
            }
3604
        }
3605
    }
3606
3607
    /**
3608
     * @param $token
3609
     * @param $mode
3610
     */
3611
    private function processWithRulesFor($token, $mode) {
3612
        /* "using the rules for the m insertion mode", where m is one of these
3613
         * modes, the user agent must use the rules described under the m
3614
         * insertion mode's section, but must leave the insertion mode
3615
         * unchanged unless the rules in m themselves switch the insertion mode
3616
         * to a new value. */
3617
        $this->emitToken($token, $mode);
3618
    }
3619
3620
    /**
3621
     * @param $token
3622
     */
3623
    private function insertCDATAElement($token) {
3624
        $this->insertElement($token);
3625
        $this->original_mode = $this->mode;
3626
        $this->mode = self::IN_CDATA_RCDATA;
3627
        $this->content_model = HTML5_Tokenizer::CDATA;
3628
    }
3629
3630
    /**
3631
     * @param $token
3632
     */
3633
    private function insertRCDATAElement($token) {
3634
        $this->insertElement($token);
3635
        $this->original_mode = $this->mode;
3636
        $this->mode = self::IN_CDATA_RCDATA;
3637
        $this->content_model = HTML5_Tokenizer::RCDATA;
3638
    }
3639
3640
    /**
3641
     * @param $token
3642
     * @param $key
3643
     * @return bool
3644
     */
3645
    private function getAttr($token, $key) {
3646
        if (!isset($token['attr'])) {
3647
            return false;
3648
        }
3649
        $ret = false;
3650
        foreach ($token['attr'] as $keypair) {
3651
            if ($keypair['name'] === $key) {
3652
                $ret = $keypair['value'];
3653
            }
3654
        }
3655
        return $ret;
3656
    }
3657
3658
    /**
3659
     * @return mixed
3660
     */
3661
    private function getCurrentTable() {
3662
        /* The current table is the last table  element in the stack of open
3663
         * elements, if there is one. If there is no table element in the stack
3664
         * of open elements (fragment case), then the current table is the
3665
         * first element in the stack of open elements (the html element). */
3666
        for ($i = count($this->stack) - 1; $i >= 0; $i--) {
3667
            if ($this->stack[$i]->tagName === 'table') {
3668
                return $this->stack[$i];
3669
            }
3670
        }
3671
        return $this->stack[0];
3672
    }
3673
3674
    /**
3675
     * @return mixed
3676
     */
3677
    private function getFosterParent() {
3678
        /* The foster parent element is the parent element of the last
3679
        table element in the stack of open elements, if there is a
3680
        table element and it has such a parent element. If there is no
3681
        table element in the stack of open elements (innerHTML case),
3682
        then the foster parent element is the first element in the
3683
        stack of open elements (the html  element). Otherwise, if there
3684
        is a table element in the stack of open elements, but the last
3685
        table element in the stack of open elements has no parent, or
3686
        its parent node is not an element, then the foster parent
3687
        element is the element before the last table element in the
3688
        stack of open elements. */
3689
        for ($n = count($this->stack) - 1; $n >= 0; $n--) {
3690
            if ($this->stack[$n]->tagName === 'table') {
3691
                $table = $this->stack[$n];
3692
                break;
3693
            }
3694
        }
3695
3696
        if (isset($table) && $table->parentNode !== null) {
3697
            return $table->parentNode;
3698
3699
        } elseif (!isset($table)) {
3700
            return $this->stack[0];
3701
3702
        } elseif (isset($table) && ($table->parentNode === null ||
3703
        $table->parentNode->nodeType !== XML_ELEMENT_NODE)) {
3704
            return $this->stack[$n - 1];
3705
        }
3706
3707
        return null;
3708
    }
3709
3710
    /**
3711
     * @param $node
3712
     */
3713
    public function fosterParent($node) {
3714
        $foster_parent = $this->getFosterParent();
3715
        $table = $this->getCurrentTable(); // almost equivalent to last table element, except it can be html
3716
        /* When a node node is to be foster parented, the node node must be
3717
         * be inserted into the foster parent element. */
3718
        /* If the foster parent element is the parent element of the last table
3719
         * element in the stack of open elements, then node must be inserted
3720
         * immediately before the last table element in the stack of open
3721
         * elements in the foster parent element; otherwise, node must be
3722
         * appended to the foster parent element. */
3723
        if ($table->tagName === 'table' && $table->parentNode->isSameNode($foster_parent)) {
3724
            $foster_parent->insertBefore($node, $table);
3725
        } else {
3726
            $foster_parent->appendChild($node);
3727
        }
3728
    }
3729
3730
    /**
3731
     * For debugging, prints the stack
3732
     */
3733
    private function printStack() {
3734
        $names = array();
3735
        foreach ($this->stack as $i => $element) {
3736
            $names[] = $element->tagName;
3737
        }
3738
        echo "  -> stack [" . implode(', ', $names) . "]\n";
3739
    }
3740
3741
    /**
3742
     * For debugging, prints active formatting elements
3743
     */
3744
    private function printActiveFormattingElements() {
3745
        if (!$this->a_formatting) {
3746
            return;
3747
        }
3748
        $names = array();
3749
        foreach ($this->a_formatting as $node) {
3750
            if ($node === self::MARKER) {
3751
                $names[] = 'MARKER';
3752
            } else {
3753
                $names[] = $node->tagName;
3754
            }
3755
        }
3756
        echo "  -> active formatting [" . implode(', ', $names) . "]\n";
3757
    }
3758
3759
    /**
3760
     * @return bool
3761
     */
3762
    public function currentTableIsTainted() {
3763
        return !empty($this->getCurrentTable()->tainted);
3764
    }
3765
3766
    /**
3767
     * Sets up the tree constructor for building a fragment.
3768
     *
3769
     * @param null $context
3770
     */
3771
    public function setupContext($context = null) {
3772
        $this->fragment = true;
3773
        if ($context) {
3774
            $context = $this->dom->createElementNS(self::NS_HTML, $context);
3775
            /* 4.1. Set the HTML parser's tokenization  stage's content model
3776
             * flag according to the context element, as follows: */
3777
            switch ($context->tagName) {
3778
                case 'title': case 'textarea':
3779
                    $this->content_model = HTML5_Tokenizer::RCDATA;
3780
                    break;
3781
                case 'style': case 'script': case 'xmp': case 'iframe':
3782
                case 'noembed': case 'noframes':
3783
                    $this->content_model = HTML5_Tokenizer::CDATA;
3784
                    break;
3785
                case 'noscript':
3786
                    // XSCRIPT: assuming scripting is enabled
3787
                    $this->content_model = HTML5_Tokenizer::CDATA;
3788
                    break;
3789
                case 'plaintext':
3790
                    $this->content_model = HTML5_Tokenizer::PLAINTEXT;
3791
                    break;
3792
            }
3793
            /* 4.2. Let root be a new html element with no attributes. */
3794
            $root = $this->dom->createElementNS(self::NS_HTML, 'html');
3795
            $this->root = $root;
3796
            /* 4.3 Append the element root to the Document node created above. */
3797
            $this->dom->appendChild($root);
3798
            /* 4.4 Set up the parser's stack of open elements so that it
3799
             * contains just the single element root. */
3800
            $this->stack = array($root);
3801
            /* 4.5 Reset the parser's insertion mode appropriately. */
3802
            $this->resetInsertionMode($context);
3803
            /* 4.6 Set the parser's form element pointer  to the nearest node
3804
             * to the context element that is a form element (going straight up
3805
             * the ancestor chain, and including the element itself, if it is a
3806
             * form element), or, if there is no such form element, to null. */
3807
            $node = $context;
3808
            do {
3809
                if ($node->tagName === 'form') {
3810
                    $this->form_pointer = $node;
3811
                    break;
3812
                }
3813
            } while ($node = $node->parentNode);
3814
        }
3815
    }
3816
3817
    /**
3818
     * @param $token
3819
     * @return mixed
3820
     */
3821
    public function adjustMathMLAttributes($token) {
3822
        foreach ($token['attr'] as &$kp) {
3823
            if ($kp['name'] === 'definitionurl') {
3824
                $kp['name'] = 'definitionURL';
3825
            }
3826
        }
3827
        return $token;
3828
    }
3829
3830
    /**
3831
     * @param $token
3832
     * @return mixed
3833
     */
3834
    public function adjustSVGAttributes($token) {
3835
        static $lookup = array(
3836
            'attributename' => 'attributeName',
3837
            'attributetype' => 'attributeType',
3838
            'basefrequency' => 'baseFrequency',
3839
            'baseprofile' => 'baseProfile',
3840
            'calcmode' => 'calcMode',
3841
            'clippathunits' => 'clipPathUnits',
3842
            'contentscripttype' => 'contentScriptType',
3843
            'contentstyletype' => 'contentStyleType',
3844
            'diffuseconstant' => 'diffuseConstant',
3845
            'edgemode' => 'edgeMode',
3846
            'externalresourcesrequired' => 'externalResourcesRequired',
3847
            'filterres' => 'filterRes',
3848
            'filterunits' => 'filterUnits',
3849
            'glyphref' => 'glyphRef',
3850
            'gradienttransform' => 'gradientTransform',
3851
            'gradientunits' => 'gradientUnits',
3852
            'kernelmatrix' => 'kernelMatrix',
3853
            'kernelunitlength' => 'kernelUnitLength',
3854
            'keypoints' => 'keyPoints',
3855
            'keysplines' => 'keySplines',
3856
            'keytimes' => 'keyTimes',
3857
            'lengthadjust' => 'lengthAdjust',
3858
            'limitingconeangle' => 'limitingConeAngle',
3859
            'markerheight' => 'markerHeight',
3860
            'markerunits' => 'markerUnits',
3861
            'markerwidth' => 'markerWidth',
3862
            'maskcontentunits' => 'maskContentUnits',
3863
            'maskunits' => 'maskUnits',
3864
            'numoctaves' => 'numOctaves',
3865
            'pathlength' => 'pathLength',
3866
            'patterncontentunits' => 'patternContentUnits',
3867
            'patterntransform' => 'patternTransform',
3868
            'patternunits' => 'patternUnits',
3869
            'pointsatx' => 'pointsAtX',
3870
            'pointsaty' => 'pointsAtY',
3871
            'pointsatz' => 'pointsAtZ',
3872
            'preservealpha' => 'preserveAlpha',
3873
            'preserveaspectratio' => 'preserveAspectRatio',
3874
            'primitiveunits' => 'primitiveUnits',
3875
            'refx' => 'refX',
3876
            'refy' => 'refY',
3877
            'repeatcount' => 'repeatCount',
3878
            'repeatdur' => 'repeatDur',
3879
            'requiredextensions' => 'requiredExtensions',
3880
            'requiredfeatures' => 'requiredFeatures',
3881
            'specularconstant' => 'specularConstant',
3882
            'specularexponent' => 'specularExponent',
3883
            'spreadmethod' => 'spreadMethod',
3884
            'startoffset' => 'startOffset',
3885
            'stddeviation' => 'stdDeviation',
3886
            'stitchtiles' => 'stitchTiles',
3887
            'surfacescale' => 'surfaceScale',
3888
            'systemlanguage' => 'systemLanguage',
3889
            'tablevalues' => 'tableValues',
3890
            'targetx' => 'targetX',
3891
            'targety' => 'targetY',
3892
            'textlength' => 'textLength',
3893
            'viewbox' => 'viewBox',
3894
            'viewtarget' => 'viewTarget',
3895
            'xchannelselector' => 'xChannelSelector',
3896
            'ychannelselector' => 'yChannelSelector',
3897
            'zoomandpan' => 'zoomAndPan',
3898
        );
3899
        foreach ($token['attr'] as &$kp) {
3900
            if (isset($lookup[$kp['name']])) {
3901
                $kp['name'] = $lookup[$kp['name']];
3902
            }
3903
        }
3904
        return $token;
3905
    }
3906
3907
    /**
3908
     * @param $token
3909
     * @return mixed
3910
     */
3911
    public function adjustForeignAttributes($token) {
3912
        static $lookup = array(
3913
            'xlink:actuate' => array('xlink', 'actuate', self::NS_XLINK),
3914
            'xlink:arcrole' => array('xlink', 'arcrole', self::NS_XLINK),
3915
            'xlink:href' => array('xlink', 'href', self::NS_XLINK),
3916
            'xlink:role' => array('xlink', 'role', self::NS_XLINK),
3917
            'xlink:show' => array('xlink', 'show', self::NS_XLINK),
3918
            'xlink:title' => array('xlink', 'title', self::NS_XLINK),
3919
            'xlink:type' => array('xlink', 'type', self::NS_XLINK),
3920
            'xml:base' => array('xml', 'base', self::NS_XML),
3921
            'xml:lang' => array('xml', 'lang', self::NS_XML),
3922
            'xml:space' => array('xml', 'space', self::NS_XML),
3923
            'xmlns' => array(null, 'xmlns', self::NS_XMLNS),
3924
            'xmlns:xlink' => array('xmlns', 'xlink', self::NS_XMLNS),
3925
        );
3926
        foreach ($token['attr'] as &$kp) {
3927
            if (isset($lookup[$kp['name']])) {
3928
                $kp['name'] = $lookup[$kp['name']];
3929
            }
3930
        }
3931
        return $token;
3932
    }
3933
3934
    /**
3935
     * @param $token
3936
     * @param $namespaceURI
3937
     */
3938
    public function insertForeignElement($token, $namespaceURI) {
3939
        $el = $this->dom->createElementNS($namespaceURI, $token['name']);
3940
3941
        if (!empty($token['attr'])) {
3942
            foreach ($token['attr'] as $kp) {
3943
                $attr = $kp['name'];
3944
                if (is_array($attr)) {
3945
                    $ns = $attr[2];
3946
                    $attr = $attr[1];
3947
                } else {
3948
                    $ns = self::NS_HTML;
3949
                }
3950
                if (!$el->hasAttributeNS($ns, $attr)) {
3951
                    // XSKETCHY: work around godawful libxml bug
3952
                    if ($ns === self::NS_XLINK) {
3953
                        $el->setAttribute('xlink:'.$attr, $kp['value']);
3954
                    } elseif ($ns === self::NS_HTML) {
3955
                        // Another godawful libxml bug
3956
                        $el->setAttribute($attr, $kp['value']);
3957
                    } else {
3958
                        $el->setAttributeNS($ns, $attr, $kp['value']);
3959
                    }
3960
                }
3961
            }
3962
        }
3963
        $this->appendToRealParent($el);
3964
        $this->stack[] = $el;
3965
        // XERROR: see below
3966
        /* If the newly created element has an xmlns attribute in the XMLNS
3967
         * namespace  whose value is not exactly the same as the element's
3968
         * namespace, that is a parse error. Similarly, if the newly created
3969
         * element has an xmlns:xlink attribute in the XMLNS namespace whose
3970
         * value is not the XLink Namespace, that is a parse error. */
3971
    }
3972
3973
    /**
3974
     * @return DOMDocument|DOMNodeList
3975
     */
3976
    public function save() {
3977
        $this->dom->normalize();
3978
        if (!$this->fragment) {
3979
            return $this->dom;
3980
        } else {
3981
            if ($this->root) {
3982
                return $this->root->childNodes;
3983
            } else {
3984
                return $this->dom->childNodes;
3985
            }
3986
        }
3987
    }
3988
}
3989
3990