| Total Complexity | 569 |
| Total Lines | 2746 |
| Duplicated Lines | 0 % |
| Changes | 1 | ||
| Bugs | 0 | Features | 0 |
Complex classes like HTML5TreeConstructer often do a lot of different things. To break such a class down, we need to identify a cohesive component within that class. A common approach to find such a component is to look for fields/methods that share the same prefixes, or suffixes.
Once you have determined the fields that belong together, you can apply the Extract Class refactoring. If the component makes sense as a sub-class, Extract Subclass is also a candidate, and is often faster.
While breaking up the class, it is a good idea to analyze how other classes use HTML5TreeConstructer, and based on these observations, apply Extract Interface, too.
| 1 | <?php |
||
| 1142 | class HTML5TreeConstructer |
||
| 1143 | { |
||
| 1144 | public $stack = array(); |
||
| 1145 | |||
| 1146 | private $phase; |
||
| 1147 | private $mode; |
||
| 1148 | private $dom; |
||
| 1149 | private $foster_parent = null; |
||
| 1150 | private $a_formatting = array(); |
||
| 1151 | |||
| 1152 | private $head_pointer = null; |
||
| 1153 | private $form_pointer = null; |
||
| 1154 | |||
| 1155 | private $scoping = array('button','caption','html','marquee','object','table','td','th'); |
||
| 1156 | private $formatting = array('a','b','big','em','font','i','nobr','s','small','strike','strong','tt','u'); |
||
| 1157 | private $special = array('address','area','base','basefont','bgsound', |
||
| 1158 | 'blockquote','body','br','center','col','colgroup','dd','dir','div','dl', |
||
| 1159 | 'dt','embed','fieldset','form','frame','frameset','h1','h2','h3','h4','h5', |
||
| 1160 | 'h6','head','hr','iframe','image','img','input','isindex','li','link', |
||
| 1161 | 'listing','menu','meta','noembed','noframes','noscript','ol','optgroup', |
||
| 1162 | 'option','p','param','plaintext','pre','script','select','spacer','style', |
||
| 1163 | 'tbody','textarea','tfoot','thead','title','tr','ul','wbr'); |
||
| 1164 | |||
| 1165 | // The different phases. |
||
| 1166 | const INIT_PHASE = 0; |
||
| 1167 | const ROOT_PHASE = 1; |
||
| 1168 | const MAIN_PHASE = 2; |
||
| 1169 | const END_PHASE = 3; |
||
| 1170 | |||
| 1171 | // The different insertion modes for the main phase. |
||
| 1172 | const BEFOR_HEAD = 0; |
||
| 1173 | const IN_HEAD = 1; |
||
| 1174 | const AFTER_HEAD = 2; |
||
| 1175 | const IN_BODY = 3; |
||
| 1176 | const IN_TABLE = 4; |
||
| 1177 | const IN_CAPTION = 5; |
||
| 1178 | const IN_CGROUP = 6; |
||
| 1179 | const IN_TBODY = 7; |
||
| 1180 | const IN_ROW = 8; |
||
| 1181 | const IN_CELL = 9; |
||
| 1182 | const IN_SELECT = 10; |
||
| 1183 | const AFTER_BODY = 11; |
||
| 1184 | const IN_FRAME = 12; |
||
| 1185 | const AFTR_FRAME = 13; |
||
| 1186 | |||
| 1187 | // The different types of elements. |
||
| 1188 | const SPECIAL = 0; |
||
| 1189 | const SCOPING = 1; |
||
| 1190 | const FORMATTING = 2; |
||
| 1191 | const PHRASING = 3; |
||
| 1192 | |||
| 1193 | const MARKER = 0; |
||
| 1194 | |||
| 1195 | public function __construct() |
||
| 1196 | { |
||
| 1197 | $this->phase = self::INIT_PHASE; |
||
| 1198 | $this->mode = self::BEFOR_HEAD; |
||
| 1199 | $this->dom = new DOMDocument; |
||
| 1200 | |||
| 1201 | $this->dom->encoding = 'UTF-8'; |
||
| 1202 | $this->dom->preserveWhiteSpace = true; |
||
| 1203 | $this->dom->substituteEntities = true; |
||
| 1204 | $this->dom->strictErrorChecking = false; |
||
| 1205 | } |
||
| 1206 | |||
| 1207 | // Process tag tokens |
||
| 1208 | public function emitToken($token) |
||
| 1209 | { |
||
| 1210 | switch($this->phase) { |
||
| 1211 | case self::INIT_PHASE: return $this->initPhase($token); break; |
||
| 1212 | case self::ROOT_PHASE: return $this->rootElementPhase($token); break; |
||
| 1213 | case self::MAIN_PHASE: return $this->mainPhase($token); break; |
||
| 1214 | case self::END_PHASE : return $this->trailingEndPhase($token); break; |
||
| 1215 | } |
||
| 1216 | } |
||
| 1217 | |||
| 1218 | private function initPhase($token) |
||
| 1219 | { |
||
| 1220 | /* Initially, the tree construction stage must handle each token |
||
| 1221 | emitted from the tokenisation stage as follows: */ |
||
| 1222 | |||
| 1223 | /* A DOCTYPE token that is marked as being in error |
||
| 1224 | A comment token |
||
| 1225 | A start tag token |
||
| 1226 | An end tag token |
||
| 1227 | A character token that is not one of one of U+0009 CHARACTER TABULATION, |
||
| 1228 | U+000A LINE FEED (LF), U+000B LINE TABULATION, U+000C FORM FEED (FF), |
||
| 1229 | or U+0020 SPACE |
||
| 1230 | An end-of-file token */ |
||
| 1231 | if((isset($token['error']) && $token['error']) || |
||
| 1232 | $token['type'] === HTML5::COMMENT || |
||
| 1233 | $token['type'] === HTML5::STARTTAG || |
||
| 1234 | $token['type'] === HTML5::ENDTAG || |
||
| 1235 | $token['type'] === HTML5::EOF || |
||
| 1236 | ($token['type'] === HTML5::CHARACTR && isset($token['data']) && |
||
| 1237 | !preg_match('/^[\t\n\x0b\x0c ]+$/', $token['data']))) { |
||
| 1238 | /* This specification does not define how to handle this case. In |
||
| 1239 | particular, user agents may ignore the entirety of this specification |
||
| 1240 | altogether for such documents, and instead invoke special parse modes |
||
| 1241 | with a greater emphasis on backwards compatibility. */ |
||
| 1242 | |||
| 1243 | $this->phase = self::ROOT_PHASE; |
||
| 1244 | return $this->rootElementPhase($token); |
||
| 1245 | |||
| 1246 | /* A DOCTYPE token marked as being correct */ |
||
| 1247 | } elseif(isset($token['error']) && !$token['error']) { |
||
| 1248 | /* Append a DocumentType node to the Document node, with the name |
||
| 1249 | attribute set to the name given in the DOCTYPE token (which will be |
||
| 1250 | "HTML"), and the other attributes specific to DocumentType objects |
||
| 1251 | set to null, empty lists, or the empty string as appropriate. */ |
||
| 1252 | $doctype = new DOMDocumentType(null, null, 'HTML'); |
||
| 1253 | |||
| 1254 | /* Then, switch to the root element phase of the tree construction |
||
| 1255 | stage. */ |
||
| 1256 | $this->phase = self::ROOT_PHASE; |
||
| 1257 | |||
| 1258 | /* A character token that is one of one of U+0009 CHARACTER TABULATION, |
||
| 1259 | U+000A LINE FEED (LF), U+000B LINE TABULATION, U+000C FORM FEED (FF), |
||
| 1260 | or U+0020 SPACE */ |
||
| 1261 | } elseif(isset($token['data']) && preg_match('/^[\t\n\x0b\x0c ]+$/', |
||
| 1262 | $token['data'])) { |
||
| 1263 | /* Append that character to the Document node. */ |
||
| 1264 | $text = $this->dom->createTextNode($token['data']); |
||
| 1265 | $this->dom->appendChild($text); |
||
| 1266 | } |
||
| 1267 | } |
||
| 1268 | |||
| 1269 | private function rootElementPhase($token) |
||
| 1270 | { |
||
| 1271 | /* After the initial phase, as each token is emitted from the tokenisation |
||
| 1272 | stage, it must be processed as described in this section. */ |
||
| 1273 | |||
| 1274 | /* A DOCTYPE token */ |
||
| 1275 | if($token['type'] === HTML5::DOCTYPE) { |
||
| 1276 | // Parse error. Ignore the token. |
||
| 1277 | |||
| 1278 | /* A comment token */ |
||
| 1279 | } elseif($token['type'] === HTML5::COMMENT) { |
||
| 1280 | /* Append a Comment node to the Document object with the data |
||
| 1281 | attribute set to the data given in the comment token. */ |
||
| 1282 | $comment = $this->dom->createComment($token['data']); |
||
| 1283 | $this->dom->appendChild($comment); |
||
| 1284 | |||
| 1285 | /* A character token that is one of one of U+0009 CHARACTER TABULATION, |
||
| 1286 | U+000A LINE FEED (LF), U+000B LINE TABULATION, U+000C FORM FEED (FF), |
||
| 1287 | or U+0020 SPACE */ |
||
| 1288 | } elseif($token['type'] === HTML5::CHARACTR && |
||
| 1289 | preg_match('/^[\t\n\x0b\x0c ]+$/', $token['data'])) { |
||
| 1290 | /* Append that character to the Document node. */ |
||
| 1291 | $text = $this->dom->createTextNode($token['data']); |
||
| 1292 | $this->dom->appendChild($text); |
||
| 1293 | |||
| 1294 | /* A character token that is not one of U+0009 CHARACTER TABULATION, |
||
| 1295 | U+000A LINE FEED (LF), U+000B LINE TABULATION, U+000C FORM FEED |
||
| 1296 | (FF), or U+0020 SPACE |
||
| 1297 | A start tag token |
||
| 1298 | An end tag token |
||
| 1299 | An end-of-file token */ |
||
| 1300 | } elseif(($token['type'] === HTML5::CHARACTR && |
||
| 1301 | !preg_match('/^[\t\n\x0b\x0c ]+$/', $token['data'])) || |
||
| 1302 | $token['type'] === HTML5::STARTTAG || |
||
| 1303 | $token['type'] === HTML5::ENDTAG || |
||
| 1304 | $token['type'] === HTML5::EOF) { |
||
| 1305 | /* Create an HTMLElement node with the tag name html, in the HTML |
||
| 1306 | namespace. Append it to the Document object. Switch to the main |
||
| 1307 | phase and reprocess the current token. */ |
||
| 1308 | $html = $this->dom->createElement('html'); |
||
| 1309 | $this->dom->appendChild($html); |
||
| 1310 | $this->stack[] = $html; |
||
| 1311 | |||
| 1312 | $this->phase = self::MAIN_PHASE; |
||
| 1313 | return $this->mainPhase($token); |
||
| 1314 | } |
||
| 1315 | } |
||
| 1316 | |||
| 1317 | private function mainPhase($token) |
||
| 1318 | { |
||
| 1319 | /* Tokens in the main phase must be handled as follows: */ |
||
| 1320 | |||
| 1321 | /* A DOCTYPE token */ |
||
| 1322 | if($token['type'] === HTML5::DOCTYPE) { |
||
| 1323 | // Parse error. Ignore the token. |
||
| 1324 | |||
| 1325 | /* A start tag token with the tag name "html" */ |
||
| 1326 | } elseif($token['type'] === HTML5::STARTTAG && $token['name'] === 'html') { |
||
| 1327 | /* If this start tag token was not the first start tag token, then |
||
| 1328 | it is a parse error. */ |
||
| 1329 | |||
| 1330 | /* For each attribute on the token, check to see if the attribute |
||
| 1331 | is already present on the top element of the stack of open elements. |
||
| 1332 | If it is not, add the attribute and its corresponding value to that |
||
| 1333 | element. */ |
||
| 1334 | foreach($token['attr'] as $attr) { |
||
| 1335 | if(!$this->stack[0]->hasAttribute($attr['name'])) { |
||
| 1336 | $this->stack[0]->setAttribute($attr['name'], $attr['value']); |
||
| 1337 | } |
||
| 1338 | } |
||
| 1339 | |||
| 1340 | /* An end-of-file token */ |
||
| 1341 | } elseif($token['type'] === HTML5::EOF) { |
||
| 1342 | /* Generate implied end tags. */ |
||
| 1343 | $this->generateImpliedEndTags(); |
||
| 1344 | |||
| 1345 | /* Anything else. */ |
||
| 1346 | } else { |
||
| 1347 | /* Depends on the insertion mode: */ |
||
| 1348 | switch($this->mode) { |
||
| 1349 | case self::BEFOR_HEAD: return $this->beforeHead($token); break; |
||
| 1350 | case self::IN_HEAD: return $this->inHead($token); break; |
||
| 1351 | case self::AFTER_HEAD: return $this->afterHead($token); break; |
||
| 1352 | case self::IN_BODY: return $this->inBody($token); break; |
||
| 1353 | case self::IN_TABLE: return $this->inTable($token); break; |
||
| 1354 | case self::IN_CAPTION: return $this->inCaption($token); break; |
||
| 1355 | case self::IN_CGROUP: return $this->inColumnGroup($token); break; |
||
| 1356 | case self::IN_TBODY: return $this->inTableBody($token); break; |
||
| 1357 | case self::IN_ROW: return $this->inRow($token); break; |
||
| 1358 | case self::IN_CELL: return $this->inCell($token); break; |
||
| 1359 | case self::IN_SELECT: return $this->inSelect($token); break; |
||
| 1360 | case self::AFTER_BODY: return $this->afterBody($token); break; |
||
| 1361 | case self::IN_FRAME: return $this->inFrameset($token); break; |
||
| 1362 | case self::AFTR_FRAME: return $this->afterFrameset($token); break; |
||
| 1363 | case self::END_PHASE: return $this->trailingEndPhase($token); break; |
||
| 1364 | } |
||
| 1365 | } |
||
| 1366 | } |
||
| 1367 | |||
| 1368 | private function beforeHead($token) |
||
| 1369 | { |
||
| 1370 | /* Handle the token as follows: */ |
||
| 1371 | |||
| 1372 | /* A character token that is one of one of U+0009 CHARACTER TABULATION, |
||
| 1373 | U+000A LINE FEED (LF), U+000B LINE TABULATION, U+000C FORM FEED (FF), |
||
| 1374 | or U+0020 SPACE */ |
||
| 1375 | if($token['type'] === HTML5::CHARACTR && |
||
| 1376 | preg_match('/^[\t\n\x0b\x0c ]+$/', $token['data'])) { |
||
| 1377 | /* Append the character to the current node. */ |
||
| 1378 | $this->insertText($token['data']); |
||
| 1379 | |||
| 1380 | /* A comment token */ |
||
| 1381 | } elseif($token['type'] === HTML5::COMMENT) { |
||
| 1382 | /* Append a Comment node to the current node with the data attribute |
||
| 1383 | set to the data given in the comment token. */ |
||
| 1384 | $this->insertComment($token['data']); |
||
| 1385 | |||
| 1386 | /* A start tag token with the tag name "head" */ |
||
| 1387 | } elseif($token['type'] === HTML5::STARTTAG && $token['name'] === 'head') { |
||
| 1388 | /* Create an element for the token, append the new element to the |
||
| 1389 | current node and push it onto the stack of open elements. */ |
||
| 1390 | $element = $this->insertElement($token); |
||
| 1391 | |||
| 1392 | /* Set the head element pointer to this new element node. */ |
||
| 1393 | $this->head_pointer = $element; |
||
| 1394 | |||
| 1395 | /* Change the insertion mode to "in head". */ |
||
| 1396 | $this->mode = self::IN_HEAD; |
||
| 1397 | |||
| 1398 | /* A start tag token whose tag name is one of: "base", "link", "meta", |
||
| 1399 | "script", "style", "title". Or an end tag with the tag name "html". |
||
| 1400 | Or a character token that is not one of U+0009 CHARACTER TABULATION, |
||
| 1401 | U+000A LINE FEED (LF), U+000B LINE TABULATION, U+000C FORM FEED (FF), |
||
| 1402 | or U+0020 SPACE. Or any other start tag token */ |
||
| 1403 | } elseif($token['type'] === HTML5::STARTTAG || |
||
| 1404 | ($token['type'] === HTML5::ENDTAG && $token['name'] === 'html') || |
||
| 1405 | ($token['type'] === HTML5::CHARACTR && !preg_match('/^[\t\n\x0b\x0c ]$/', |
||
| 1406 | $token['data']))) { |
||
| 1407 | /* Act as if a start tag token with the tag name "head" and no |
||
| 1408 | attributes had been seen, then reprocess the current token. */ |
||
| 1409 | $this->beforeHead(array( |
||
| 1410 | 'name' => 'head', |
||
| 1411 | 'type' => HTML5::STARTTAG, |
||
| 1412 | 'attr' => array() |
||
| 1413 | )); |
||
| 1414 | |||
| 1415 | return $this->inHead($token); |
||
| 1416 | |||
| 1417 | /* Any other end tag */ |
||
| 1418 | } elseif($token['type'] === HTML5::ENDTAG) { |
||
| 1419 | /* Parse error. Ignore the token. */ |
||
| 1420 | } |
||
| 1421 | } |
||
| 1422 | |||
| 1423 | private function inHead($token) |
||
| 1424 | { |
||
| 1425 | /* Handle the token as follows: */ |
||
| 1426 | |||
| 1427 | /* A character token that is one of one of U+0009 CHARACTER TABULATION, |
||
| 1428 | U+000A LINE FEED (LF), U+000B LINE TABULATION, U+000C FORM FEED (FF), |
||
| 1429 | or U+0020 SPACE. |
||
| 1430 | |||
| 1431 | THIS DIFFERS FROM THE SPEC: If the current node is either a title, style |
||
| 1432 | or script element, append the character to the current node regardless |
||
| 1433 | of its content. */ |
||
| 1434 | if(($token['type'] === HTML5::CHARACTR && |
||
| 1435 | preg_match('/^[\t\n\x0b\x0c ]+$/', $token['data'])) || ( |
||
| 1436 | $token['type'] === HTML5::CHARACTR && in_array(end($this->stack)->nodeName, |
||
| 1437 | array('title', 'style', 'script')))) { |
||
| 1438 | /* Append the character to the current node. */ |
||
| 1439 | $this->insertText($token['data']); |
||
| 1440 | |||
| 1441 | /* A comment token */ |
||
| 1442 | } elseif($token['type'] === HTML5::COMMENT) { |
||
| 1443 | /* Append a Comment node to the current node with the data attribute |
||
| 1444 | set to the data given in the comment token. */ |
||
| 1445 | $this->insertComment($token['data']); |
||
| 1446 | |||
| 1447 | } elseif($token['type'] === HTML5::ENDTAG && |
||
| 1448 | in_array($token['name'], array('title', 'style', 'script'))) { |
||
| 1449 | array_pop($this->stack); |
||
| 1450 | return HTML5::PCDATA; |
||
| 1451 | |||
| 1452 | /* A start tag with the tag name "title" */ |
||
| 1453 | } elseif($token['type'] === HTML5::STARTTAG && $token['name'] === 'title') { |
||
| 1454 | /* Create an element for the token and append the new element to the |
||
| 1455 | node pointed to by the head element pointer, or, if that is null |
||
| 1456 | (innerHTML case), to the current node. */ |
||
| 1457 | if($this->head_pointer !== null) { |
||
| 1458 | $element = $this->insertElement($token, false); |
||
| 1459 | $this->head_pointer->appendChild($element); |
||
| 1460 | |||
| 1461 | } else { |
||
| 1462 | $element = $this->insertElement($token); |
||
| 1463 | } |
||
| 1464 | |||
| 1465 | /* Switch the tokeniser's content model flag to the RCDATA state. */ |
||
| 1466 | return HTML5::RCDATA; |
||
| 1467 | |||
| 1468 | /* A start tag with the tag name "style" */ |
||
| 1469 | } elseif($token['type'] === HTML5::STARTTAG && $token['name'] === 'style') { |
||
| 1470 | /* Create an element for the token and append the new element to the |
||
| 1471 | node pointed to by the head element pointer, or, if that is null |
||
| 1472 | (innerHTML case), to the current node. */ |
||
| 1473 | if($this->head_pointer !== null) { |
||
| 1474 | $element = $this->insertElement($token, false); |
||
| 1475 | $this->head_pointer->appendChild($element); |
||
| 1476 | |||
| 1477 | } else { |
||
| 1478 | $this->insertElement($token); |
||
| 1479 | } |
||
| 1480 | |||
| 1481 | /* Switch the tokeniser's content model flag to the CDATA state. */ |
||
| 1482 | return HTML5::CDATA; |
||
| 1483 | |||
| 1484 | /* A start tag with the tag name "script" */ |
||
| 1485 | } elseif($token['type'] === HTML5::STARTTAG && $token['name'] === 'script') { |
||
| 1486 | /* Create an element for the token. */ |
||
| 1487 | $element = $this->insertElement($token, false); |
||
| 1488 | $this->head_pointer->appendChild($element); |
||
| 1489 | |||
| 1490 | /* Switch the tokeniser's content model flag to the CDATA state. */ |
||
| 1491 | return HTML5::CDATA; |
||
| 1492 | |||
| 1493 | /* A start tag with the tag name "base", "link", or "meta" */ |
||
| 1494 | } elseif($token['type'] === HTML5::STARTTAG && in_array($token['name'], |
||
| 1495 | array('base', 'link', 'meta'))) { |
||
| 1496 | /* Create an element for the token and append the new element to the |
||
| 1497 | node pointed to by the head element pointer, or, if that is null |
||
| 1498 | (innerHTML case), to the current node. */ |
||
| 1499 | if($this->head_pointer !== null) { |
||
| 1500 | $element = $this->insertElement($token, false); |
||
| 1501 | $this->head_pointer->appendChild($element); |
||
| 1502 | array_pop($this->stack); |
||
| 1503 | |||
| 1504 | } else { |
||
| 1505 | $this->insertElement($token); |
||
| 1506 | } |
||
| 1507 | |||
| 1508 | /* An end tag with the tag name "head" */ |
||
| 1509 | } elseif($token['type'] === HTML5::ENDTAG && $token['name'] === 'head') { |
||
| 1510 | /* If the current node is a head element, pop the current node off |
||
| 1511 | the stack of open elements. */ |
||
| 1512 | if($this->head_pointer->isSameNode(end($this->stack))) { |
||
| 1513 | array_pop($this->stack); |
||
| 1514 | |||
| 1515 | /* Otherwise, this is a parse error. */ |
||
| 1516 | } else { |
||
| 1517 | // k |
||
| 1518 | } |
||
| 1519 | |||
| 1520 | /* Change the insertion mode to "after head". */ |
||
| 1521 | $this->mode = self::AFTER_HEAD; |
||
| 1522 | |||
| 1523 | /* A start tag with the tag name "head" or an end tag except "html". */ |
||
| 1524 | } elseif(($token['type'] === HTML5::STARTTAG && $token['name'] === 'head') || |
||
| 1525 | ($token['type'] === HTML5::ENDTAG && $token['name'] !== 'html')) { |
||
| 1526 | // Parse error. Ignore the token. |
||
| 1527 | |||
| 1528 | /* Anything else */ |
||
| 1529 | } else { |
||
| 1530 | /* If the current node is a head element, act as if an end tag |
||
| 1531 | token with the tag name "head" had been seen. */ |
||
| 1532 | if($this->head_pointer->isSameNode(end($this->stack))) { |
||
| 1533 | $this->inHead(array( |
||
| 1534 | 'name' => 'head', |
||
| 1535 | 'type' => HTML5::ENDTAG |
||
| 1536 | )); |
||
| 1537 | |||
| 1538 | /* Otherwise, change the insertion mode to "after head". */ |
||
| 1539 | } else { |
||
| 1540 | $this->mode = self::AFTER_HEAD; |
||
| 1541 | } |
||
| 1542 | |||
| 1543 | /* Then, reprocess the current token. */ |
||
| 1544 | return $this->afterHead($token); |
||
| 1545 | } |
||
| 1546 | } |
||
| 1547 | |||
| 1548 | private function afterHead($token) |
||
| 1549 | { |
||
| 1550 | /* Handle the token as follows: */ |
||
| 1551 | |||
| 1552 | /* A character token that is one of one of U+0009 CHARACTER TABULATION, |
||
| 1553 | U+000A LINE FEED (LF), U+000B LINE TABULATION, U+000C FORM FEED (FF), |
||
| 1554 | or U+0020 SPACE */ |
||
| 1555 | if($token['type'] === HTML5::CHARACTR && |
||
| 1556 | preg_match('/^[\t\n\x0b\x0c ]+$/', $token['data'])) { |
||
| 1557 | /* Append the character to the current node. */ |
||
| 1558 | $this->insertText($token['data']); |
||
| 1559 | |||
| 1560 | /* A comment token */ |
||
| 1561 | } elseif($token['type'] === HTML5::COMMENT) { |
||
| 1562 | /* Append a Comment node to the current node with the data attribute |
||
| 1563 | set to the data given in the comment token. */ |
||
| 1564 | $this->insertComment($token['data']); |
||
| 1565 | |||
| 1566 | /* A start tag token with the tag name "body" */ |
||
| 1567 | } elseif($token['type'] === HTML5::STARTTAG && $token['name'] === 'body') { |
||
| 1568 | /* Insert a body element for the token. */ |
||
| 1569 | $this->insertElement($token); |
||
| 1570 | |||
| 1571 | /* Change the insertion mode to "in body". */ |
||
| 1572 | $this->mode = self::IN_BODY; |
||
| 1573 | |||
| 1574 | /* A start tag token with the tag name "frameset" */ |
||
| 1575 | } elseif($token['type'] === HTML5::STARTTAG && $token['name'] === 'frameset') { |
||
| 1576 | /* Insert a frameset element for the token. */ |
||
| 1577 | $this->insertElement($token); |
||
| 1578 | |||
| 1579 | /* Change the insertion mode to "in frameset". */ |
||
| 1580 | $this->mode = self::IN_FRAME; |
||
| 1581 | |||
| 1582 | /* A start tag token whose tag name is one of: "base", "link", "meta", |
||
| 1583 | "script", "style", "title" */ |
||
| 1584 | } elseif($token['type'] === HTML5::STARTTAG && in_array($token['name'], |
||
| 1585 | array('base', 'link', 'meta', 'script', 'style', 'title'))) { |
||
| 1586 | /* Parse error. Switch the insertion mode back to "in head" and |
||
| 1587 | reprocess the token. */ |
||
| 1588 | $this->mode = self::IN_HEAD; |
||
| 1589 | return $this->inHead($token); |
||
| 1590 | |||
| 1591 | /* Anything else */ |
||
| 1592 | } else { |
||
| 1593 | /* Act as if a start tag token with the tag name "body" and no |
||
| 1594 | attributes had been seen, and then reprocess the current token. */ |
||
| 1595 | $this->afterHead(array( |
||
| 1596 | 'name' => 'body', |
||
| 1597 | 'type' => HTML5::STARTTAG, |
||
| 1598 | 'attr' => array() |
||
| 1599 | )); |
||
| 1600 | |||
| 1601 | return $this->inBody($token); |
||
| 1602 | } |
||
| 1603 | } |
||
| 1604 | |||
| 1605 | private function inBody($token) |
||
| 1606 | { |
||
| 1607 | /* Handle the token as follows: */ |
||
| 1608 | |||
| 1609 | switch($token['type']) { |
||
| 1610 | /* A character token */ |
||
| 1611 | case HTML5::CHARACTR: |
||
| 1612 | /* Reconstruct the active formatting elements, if any. */ |
||
| 1613 | $this->reconstructActiveFormattingElements(); |
||
| 1614 | |||
| 1615 | /* Append the token's character to the current node. */ |
||
| 1616 | $this->insertText($token['data']); |
||
| 1617 | break; |
||
| 1618 | |||
| 1619 | /* A comment token */ |
||
| 1620 | case HTML5::COMMENT: |
||
| 1621 | /* Append a Comment node to the current node with the data |
||
| 1622 | attribute set to the data given in the comment token. */ |
||
| 1623 | $this->insertComment($token['data']); |
||
| 1624 | break; |
||
| 1625 | |||
| 1626 | case HTML5::STARTTAG: |
||
| 1627 | switch($token['name']) { |
||
| 1628 | /* A start tag token whose tag name is one of: "script", |
||
| 1629 | "style" */ |
||
| 1630 | case 'script': case 'style': |
||
| 1631 | /* Process the token as if the insertion mode had been "in |
||
| 1632 | head". */ |
||
| 1633 | return $this->inHead($token); |
||
| 1634 | break; |
||
| 1635 | |||
| 1636 | /* A start tag token whose tag name is one of: "base", "link", |
||
| 1637 | "meta", "title" */ |
||
| 1638 | case 'base': case 'link': case 'meta': case 'title': |
||
| 1639 | /* Parse error. Process the token as if the insertion mode |
||
| 1640 | had been "in head". */ |
||
| 1641 | return $this->inHead($token); |
||
| 1642 | break; |
||
| 1643 | |||
| 1644 | /* A start tag token with the tag name "body" */ |
||
| 1645 | case 'body': |
||
| 1646 | /* Parse error. If the second element on the stack of open |
||
| 1647 | elements is not a body element, or, if the stack of open |
||
| 1648 | elements has only one node on it, then ignore the token. |
||
| 1649 | (innerHTML case) */ |
||
| 1650 | if(count($this->stack) === 1 || $this->stack[1]->nodeName !== 'body') { |
||
| 1651 | // Ignore |
||
| 1652 | |||
| 1653 | /* Otherwise, for each attribute on the token, check to see |
||
| 1654 | if the attribute is already present on the body element (the |
||
| 1655 | second element) on the stack of open elements. If it is not, |
||
| 1656 | add the attribute and its corresponding value to that |
||
| 1657 | element. */ |
||
| 1658 | } else { |
||
| 1659 | foreach($token['attr'] as $attr) { |
||
| 1660 | if(!$this->stack[1]->hasAttribute($attr['name'])) { |
||
| 1661 | $this->stack[1]->setAttribute($attr['name'], $attr['value']); |
||
| 1662 | } |
||
| 1663 | } |
||
| 1664 | } |
||
| 1665 | break; |
||
| 1666 | |||
| 1667 | /* A start tag whose tag name is one of: "address", |
||
| 1668 | "blockquote", "center", "dir", "div", "dl", "fieldset", |
||
| 1669 | "listing", "menu", "ol", "p", "ul" */ |
||
| 1670 | case 'address': case 'blockquote': case 'center': case 'dir': |
||
| 1671 | case 'div': case 'dl': case 'fieldset': case 'listing': |
||
| 1672 | case 'menu': case 'ol': case 'p': case 'ul': |
||
| 1673 | /* If the stack of open elements has a p element in scope, |
||
| 1674 | then act as if an end tag with the tag name p had been |
||
| 1675 | seen. */ |
||
| 1676 | if($this->elementInScope('p')) { |
||
| 1677 | $this->emitToken(array( |
||
| 1678 | 'name' => 'p', |
||
| 1679 | 'type' => HTML5::ENDTAG |
||
| 1680 | )); |
||
| 1681 | } |
||
| 1682 | |||
| 1683 | /* Insert an HTML element for the token. */ |
||
| 1684 | $this->insertElement($token); |
||
| 1685 | break; |
||
| 1686 | |||
| 1687 | /* A start tag whose tag name is "form" */ |
||
| 1688 | case 'form': |
||
| 1689 | /* If the form element pointer is not null, ignore the |
||
| 1690 | token with a parse error. */ |
||
| 1691 | if($this->form_pointer !== null) { |
||
| 1692 | // Ignore. |
||
| 1693 | |||
| 1694 | /* Otherwise: */ |
||
| 1695 | } else { |
||
| 1696 | /* If the stack of open elements has a p element in |
||
| 1697 | scope, then act as if an end tag with the tag name p |
||
| 1698 | had been seen. */ |
||
| 1699 | if($this->elementInScope('p')) { |
||
| 1700 | $this->emitToken(array( |
||
| 1701 | 'name' => 'p', |
||
| 1702 | 'type' => HTML5::ENDTAG |
||
| 1703 | )); |
||
| 1704 | } |
||
| 1705 | |||
| 1706 | /* Insert an HTML element for the token, and set the |
||
| 1707 | form element pointer to point to the element created. */ |
||
| 1708 | $element = $this->insertElement($token); |
||
| 1709 | $this->form_pointer = $element; |
||
| 1710 | } |
||
| 1711 | break; |
||
| 1712 | |||
| 1713 | /* A start tag whose tag name is "li", "dd" or "dt" */ |
||
| 1714 | case 'li': case 'dd': case 'dt': |
||
| 1715 | /* If the stack of open elements has a p element in scope, |
||
| 1716 | then act as if an end tag with the tag name p had been |
||
| 1717 | seen. */ |
||
| 1718 | if($this->elementInScope('p')) { |
||
| 1719 | $this->emitToken(array( |
||
| 1720 | 'name' => 'p', |
||
| 1721 | 'type' => HTML5::ENDTAG |
||
| 1722 | )); |
||
| 1723 | } |
||
| 1724 | |||
| 1725 | $stack_length = count($this->stack) - 1; |
||
| 1726 | |||
| 1727 | for($n = $stack_length; 0 <= $n; $n--) { |
||
| 1728 | /* 1. Initialise node to be the current node (the |
||
| 1729 | bottommost node of the stack). */ |
||
| 1730 | $stop = false; |
||
| 1731 | $node = $this->stack[$n]; |
||
| 1732 | $cat = $this->getElementCategory($node->tagName); |
||
| 1733 | |||
| 1734 | /* 2. If node is an li, dd or dt element, then pop all |
||
| 1735 | the nodes from the current node up to node, including |
||
| 1736 | node, then stop this algorithm. */ |
||
| 1737 | if($token['name'] === $node->tagName || ($token['name'] !== 'li' |
||
| 1738 | && ($node->tagName === 'dd' || $node->tagName === 'dt'))) { |
||
| 1739 | for($x = $stack_length; $x >= $n ; $x--) { |
||
| 1740 | array_pop($this->stack); |
||
| 1741 | } |
||
| 1742 | |||
| 1743 | break; |
||
| 1744 | } |
||
| 1745 | |||
| 1746 | /* 3. If node is not in the formatting category, and is |
||
| 1747 | not in the phrasing category, and is not an address or |
||
| 1748 | div element, then stop this algorithm. */ |
||
| 1749 | if($cat !== self::FORMATTING && $cat !== self::PHRASING && |
||
| 1750 | $node->tagName !== 'address' && $node->tagName !== 'div') { |
||
| 1751 | break; |
||
| 1752 | } |
||
| 1753 | } |
||
| 1754 | |||
| 1755 | /* Finally, insert an HTML element with the same tag |
||
| 1756 | name as the token's. */ |
||
| 1757 | $this->insertElement($token); |
||
| 1758 | break; |
||
| 1759 | |||
| 1760 | /* A start tag token whose tag name is "plaintext" */ |
||
| 1761 | case 'plaintext': |
||
| 1762 | /* If the stack of open elements has a p element in scope, |
||
| 1763 | then act as if an end tag with the tag name p had been |
||
| 1764 | seen. */ |
||
| 1765 | if($this->elementInScope('p')) { |
||
| 1766 | $this->emitToken(array( |
||
| 1767 | 'name' => 'p', |
||
| 1768 | 'type' => HTML5::ENDTAG |
||
| 1769 | )); |
||
| 1770 | } |
||
| 1771 | |||
| 1772 | /* Insert an HTML element for the token. */ |
||
| 1773 | $this->insertElement($token); |
||
| 1774 | |||
| 1775 | return HTML5::PLAINTEXT; |
||
| 1776 | break; |
||
| 1777 | |||
| 1778 | /* A start tag whose tag name is one of: "h1", "h2", "h3", "h4", |
||
| 1779 | "h5", "h6" */ |
||
| 1780 | case 'h1': case 'h2': case 'h3': case 'h4': case 'h5': case 'h6': |
||
| 1781 | /* If the stack of open elements has a p element in scope, |
||
| 1782 | then act as if an end tag with the tag name p had been seen. */ |
||
| 1783 | if($this->elementInScope('p')) { |
||
| 1784 | $this->emitToken(array( |
||
| 1785 | 'name' => 'p', |
||
| 1786 | 'type' => HTML5::ENDTAG |
||
| 1787 | )); |
||
| 1788 | } |
||
| 1789 | |||
| 1790 | /* If the stack of open elements has in scope an element whose |
||
| 1791 | tag name is one of "h1", "h2", "h3", "h4", "h5", or "h6", then |
||
| 1792 | this is a parse error; pop elements from the stack until an |
||
| 1793 | element with one of those tag names has been popped from the |
||
| 1794 | stack. */ |
||
| 1795 | while($this->elementInScope(array('h1', 'h2', 'h3', 'h4', 'h5', 'h6'))) { |
||
| 1796 | array_pop($this->stack); |
||
| 1797 | } |
||
| 1798 | |||
| 1799 | /* Insert an HTML element for the token. */ |
||
| 1800 | $this->insertElement($token); |
||
| 1801 | break; |
||
| 1802 | |||
| 1803 | /* A start tag whose tag name is "a" */ |
||
| 1804 | case 'a': |
||
| 1805 | /* If the list of active formatting elements contains |
||
| 1806 | an element whose tag name is "a" between the end of the |
||
| 1807 | list and the last marker on the list (or the start of |
||
| 1808 | the list if there is no marker on the list), then this |
||
| 1809 | is a parse error; act as if an end tag with the tag name |
||
| 1810 | "a" had been seen, then remove that element from the list |
||
| 1811 | of active formatting elements and the stack of open |
||
| 1812 | elements if the end tag didn't already remove it (it |
||
| 1813 | might not have if the element is not in table scope). */ |
||
| 1814 | $leng = count($this->a_formatting); |
||
| 1815 | |||
| 1816 | for($n = $leng - 1; $n >= 0; $n--) { |
||
| 1817 | if($this->a_formatting[$n] === self::MARKER) { |
||
| 1818 | break; |
||
| 1819 | |||
| 1820 | } elseif($this->a_formatting[$n]->nodeName === 'a') { |
||
| 1821 | $this->emitToken(array( |
||
| 1822 | 'name' => 'a', |
||
| 1823 | 'type' => HTML5::ENDTAG |
||
| 1824 | )); |
||
| 1825 | break; |
||
| 1826 | } |
||
| 1827 | } |
||
| 1828 | |||
| 1829 | /* Reconstruct the active formatting elements, if any. */ |
||
| 1830 | $this->reconstructActiveFormattingElements(); |
||
| 1831 | |||
| 1832 | /* Insert an HTML element for the token. */ |
||
| 1833 | $el = $this->insertElement($token); |
||
| 1834 | |||
| 1835 | /* Add that element to the list of active formatting |
||
| 1836 | elements. */ |
||
| 1837 | $this->a_formatting[] = $el; |
||
| 1838 | break; |
||
| 1839 | |||
| 1840 | /* A start tag whose tag name is one of: "b", "big", "em", "font", |
||
| 1841 | "i", "nobr", "s", "small", "strike", "strong", "tt", "u" */ |
||
| 1842 | case 'b': case 'big': case 'em': case 'font': case 'i': |
||
| 1843 | case 'nobr': case 's': case 'small': case 'strike': |
||
| 1844 | case 'strong': case 'tt': case 'u': |
||
| 1845 | /* Reconstruct the active formatting elements, if any. */ |
||
| 1846 | $this->reconstructActiveFormattingElements(); |
||
| 1847 | |||
| 1848 | /* Insert an HTML element for the token. */ |
||
| 1849 | $el = $this->insertElement($token); |
||
| 1850 | |||
| 1851 | /* Add that element to the list of active formatting |
||
| 1852 | elements. */ |
||
| 1853 | $this->a_formatting[] = $el; |
||
| 1854 | break; |
||
| 1855 | |||
| 1856 | /* A start tag token whose tag name is "button" */ |
||
| 1857 | case 'button': |
||
| 1858 | /* If the stack of open elements has a button element in scope, |
||
| 1859 | then this is a parse error; act as if an end tag with the tag |
||
| 1860 | name "button" had been seen, then reprocess the token. (We don't |
||
| 1861 | do that. Unnecessary.) */ |
||
| 1862 | if($this->elementInScope('button')) { |
||
| 1863 | $this->inBody(array( |
||
| 1864 | 'name' => 'button', |
||
| 1865 | 'type' => HTML5::ENDTAG |
||
| 1866 | )); |
||
| 1867 | } |
||
| 1868 | |||
| 1869 | /* Reconstruct the active formatting elements, if any. */ |
||
| 1870 | $this->reconstructActiveFormattingElements(); |
||
| 1871 | |||
| 1872 | /* Insert an HTML element for the token. */ |
||
| 1873 | $this->insertElement($token); |
||
| 1874 | |||
| 1875 | /* Insert a marker at the end of the list of active |
||
| 1876 | formatting elements. */ |
||
| 1877 | $this->a_formatting[] = self::MARKER; |
||
| 1878 | break; |
||
| 1879 | |||
| 1880 | /* A start tag token whose tag name is one of: "marquee", "object" */ |
||
| 1881 | case 'marquee': case 'object': |
||
| 1882 | /* Reconstruct the active formatting elements, if any. */ |
||
| 1883 | $this->reconstructActiveFormattingElements(); |
||
| 1884 | |||
| 1885 | /* Insert an HTML element for the token. */ |
||
| 1886 | $this->insertElement($token); |
||
| 1887 | |||
| 1888 | /* Insert a marker at the end of the list of active |
||
| 1889 | formatting elements. */ |
||
| 1890 | $this->a_formatting[] = self::MARKER; |
||
| 1891 | break; |
||
| 1892 | |||
| 1893 | /* A start tag token whose tag name is "xmp" */ |
||
| 1894 | case 'xmp': |
||
| 1895 | /* Reconstruct the active formatting elements, if any. */ |
||
| 1896 | $this->reconstructActiveFormattingElements(); |
||
| 1897 | |||
| 1898 | /* Insert an HTML element for the token. */ |
||
| 1899 | $this->insertElement($token); |
||
| 1900 | |||
| 1901 | /* Switch the content model flag to the CDATA state. */ |
||
| 1902 | return HTML5::CDATA; |
||
| 1903 | break; |
||
| 1904 | |||
| 1905 | /* A start tag whose tag name is "table" */ |
||
| 1906 | case 'table': |
||
| 1907 | /* If the stack of open elements has a p element in scope, |
||
| 1908 | then act as if an end tag with the tag name p had been seen. */ |
||
| 1909 | if($this->elementInScope('p')) { |
||
| 1910 | $this->emitToken(array( |
||
| 1911 | 'name' => 'p', |
||
| 1912 | 'type' => HTML5::ENDTAG |
||
| 1913 | )); |
||
| 1914 | } |
||
| 1915 | |||
| 1916 | /* Insert an HTML element for the token. */ |
||
| 1917 | $this->insertElement($token); |
||
| 1918 | |||
| 1919 | /* Change the insertion mode to "in table". */ |
||
| 1920 | $this->mode = self::IN_TABLE; |
||
| 1921 | break; |
||
| 1922 | |||
| 1923 | /* A start tag whose tag name is one of: "area", "basefont", |
||
| 1924 | "bgsound", "br", "embed", "img", "param", "spacer", "wbr" */ |
||
| 1925 | case 'area': case 'basefont': case 'bgsound': case 'br': |
||
| 1926 | case 'embed': case 'img': case 'param': case 'spacer': |
||
| 1927 | case 'wbr': |
||
| 1928 | /* Reconstruct the active formatting elements, if any. */ |
||
| 1929 | $this->reconstructActiveFormattingElements(); |
||
| 1930 | |||
| 1931 | /* Insert an HTML element for the token. */ |
||
| 1932 | $this->insertElement($token); |
||
| 1933 | |||
| 1934 | /* Immediately pop the current node off the stack of open elements. */ |
||
| 1935 | array_pop($this->stack); |
||
| 1936 | break; |
||
| 1937 | |||
| 1938 | /* A start tag whose tag name is "hr" */ |
||
| 1939 | case 'hr': |
||
| 1940 | /* If the stack of open elements has a p element in scope, |
||
| 1941 | then act as if an end tag with the tag name p had been seen. */ |
||
| 1942 | if($this->elementInScope('p')) { |
||
| 1943 | $this->emitToken(array( |
||
| 1944 | 'name' => 'p', |
||
| 1945 | 'type' => HTML5::ENDTAG |
||
| 1946 | )); |
||
| 1947 | } |
||
| 1948 | |||
| 1949 | /* Insert an HTML element for the token. */ |
||
| 1950 | $this->insertElement($token); |
||
| 1951 | |||
| 1952 | /* Immediately pop the current node off the stack of open elements. */ |
||
| 1953 | array_pop($this->stack); |
||
| 1954 | break; |
||
| 1955 | |||
| 1956 | /* A start tag whose tag name is "image" */ |
||
| 1957 | case 'image': |
||
| 1958 | /* Parse error. Change the token's tag name to "img" and |
||
| 1959 | reprocess it. (Don't ask.) */ |
||
| 1960 | $token['name'] = 'img'; |
||
| 1961 | return $this->inBody($token); |
||
| 1962 | break; |
||
| 1963 | |||
| 1964 | /* A start tag whose tag name is "input" */ |
||
| 1965 | case 'input': |
||
| 1966 | /* Reconstruct the active formatting elements, if any. */ |
||
| 1967 | $this->reconstructActiveFormattingElements(); |
||
| 1968 | |||
| 1969 | /* Insert an input element for the token. */ |
||
| 1970 | $element = $this->insertElement($token, false); |
||
| 1971 | |||
| 1972 | /* If the form element pointer is not null, then associate the |
||
| 1973 | input element with the form element pointed to by the form |
||
| 1974 | element pointer. */ |
||
| 1975 | $this->form_pointer !== null |
||
| 1976 | ? $this->form_pointer->appendChild($element) |
||
| 1977 | : end($this->stack)->appendChild($element); |
||
| 1978 | |||
| 1979 | /* Pop that input element off the stack of open elements. */ |
||
| 1980 | array_pop($this->stack); |
||
| 1981 | break; |
||
| 1982 | |||
| 1983 | /* A start tag whose tag name is "isindex" */ |
||
| 1984 | case 'isindex': |
||
| 1985 | /* Parse error. */ |
||
| 1986 | // w/e |
||
| 1987 | |||
| 1988 | /* If the form element pointer is not null, |
||
| 1989 | then ignore the token. */ |
||
| 1990 | if($this->form_pointer === null) { |
||
| 1991 | /* Act as if a start tag token with the tag name "form" had |
||
| 1992 | been seen. */ |
||
| 1993 | $this->inBody(array( |
||
| 1994 | 'name' => 'body', |
||
| 1995 | 'type' => HTML5::STARTTAG, |
||
| 1996 | 'attr' => array() |
||
| 1997 | )); |
||
| 1998 | |||
| 1999 | /* Act as if a start tag token with the tag name "hr" had |
||
| 2000 | been seen. */ |
||
| 2001 | $this->inBody(array( |
||
| 2002 | 'name' => 'hr', |
||
| 2003 | 'type' => HTML5::STARTTAG, |
||
| 2004 | 'attr' => array() |
||
| 2005 | )); |
||
| 2006 | |||
| 2007 | /* Act as if a start tag token with the tag name "p" had |
||
| 2008 | been seen. */ |
||
| 2009 | $this->inBody(array( |
||
| 2010 | 'name' => 'p', |
||
| 2011 | 'type' => HTML5::STARTTAG, |
||
| 2012 | 'attr' => array() |
||
| 2013 | )); |
||
| 2014 | |||
| 2015 | /* Act as if a start tag token with the tag name "label" |
||
| 2016 | had been seen. */ |
||
| 2017 | $this->inBody(array( |
||
| 2018 | 'name' => 'label', |
||
| 2019 | 'type' => HTML5::STARTTAG, |
||
| 2020 | 'attr' => array() |
||
| 2021 | )); |
||
| 2022 | |||
| 2023 | /* Act as if a stream of character tokens had been seen. */ |
||
| 2024 | $this->insertText('This is a searchable index. '. |
||
| 2025 | 'Insert your search keywords here: '); |
||
| 2026 | |||
| 2027 | /* Act as if a start tag token with the tag name "input" |
||
| 2028 | had been seen, with all the attributes from the "isindex" |
||
| 2029 | token, except with the "name" attribute set to the value |
||
| 2030 | "isindex" (ignoring any explicit "name" attribute). */ |
||
| 2031 | $attr = $token['attr']; |
||
| 2032 | $attr[] = array('name' => 'name', 'value' => 'isindex'); |
||
| 2033 | |||
| 2034 | $this->inBody(array( |
||
| 2035 | 'name' => 'input', |
||
| 2036 | 'type' => HTML5::STARTTAG, |
||
| 2037 | 'attr' => $attr |
||
| 2038 | )); |
||
| 2039 | |||
| 2040 | /* Act as if a stream of character tokens had been seen |
||
| 2041 | (see below for what they should say). */ |
||
| 2042 | $this->insertText('This is a searchable index. '. |
||
| 2043 | 'Insert your search keywords here: '); |
||
| 2044 | |||
| 2045 | /* Act as if an end tag token with the tag name "label" |
||
| 2046 | had been seen. */ |
||
| 2047 | $this->inBody(array( |
||
| 2048 | 'name' => 'label', |
||
| 2049 | 'type' => HTML5::ENDTAG |
||
| 2050 | )); |
||
| 2051 | |||
| 2052 | /* Act as if an end tag token with the tag name "p" had |
||
| 2053 | been seen. */ |
||
| 2054 | $this->inBody(array( |
||
| 2055 | 'name' => 'p', |
||
| 2056 | 'type' => HTML5::ENDTAG |
||
| 2057 | )); |
||
| 2058 | |||
| 2059 | /* Act as if a start tag token with the tag name "hr" had |
||
| 2060 | been seen. */ |
||
| 2061 | $this->inBody(array( |
||
| 2062 | 'name' => 'hr', |
||
| 2063 | 'type' => HTML5::ENDTAG |
||
| 2064 | )); |
||
| 2065 | |||
| 2066 | /* Act as if an end tag token with the tag name "form" had |
||
| 2067 | been seen. */ |
||
| 2068 | $this->inBody(array( |
||
| 2069 | 'name' => 'form', |
||
| 2070 | 'type' => HTML5::ENDTAG |
||
| 2071 | )); |
||
| 2072 | } |
||
| 2073 | break; |
||
| 2074 | |||
| 2075 | /* A start tag whose tag name is "textarea" */ |
||
| 2076 | case 'textarea': |
||
| 2077 | $this->insertElement($token); |
||
| 2078 | |||
| 2079 | /* Switch the tokeniser's content model flag to the |
||
| 2080 | RCDATA state. */ |
||
| 2081 | return HTML5::RCDATA; |
||
| 2082 | break; |
||
| 2083 | |||
| 2084 | /* A start tag whose tag name is one of: "iframe", "noembed", |
||
| 2085 | "noframes" */ |
||
| 2086 | case 'iframe': case 'noembed': case 'noframes': |
||
| 2087 | $this->insertElement($token); |
||
| 2088 | |||
| 2089 | /* Switch the tokeniser's content model flag to the CDATA state. */ |
||
| 2090 | return HTML5::CDATA; |
||
| 2091 | break; |
||
| 2092 | |||
| 2093 | /* A start tag whose tag name is "select" */ |
||
| 2094 | case 'select': |
||
| 2095 | /* Reconstruct the active formatting elements, if any. */ |
||
| 2096 | $this->reconstructActiveFormattingElements(); |
||
| 2097 | |||
| 2098 | /* Insert an HTML element for the token. */ |
||
| 2099 | $this->insertElement($token); |
||
| 2100 | |||
| 2101 | /* Change the insertion mode to "in select". */ |
||
| 2102 | $this->mode = self::IN_SELECT; |
||
| 2103 | break; |
||
| 2104 | |||
| 2105 | /* A start or end tag whose tag name is one of: "caption", "col", |
||
| 2106 | "colgroup", "frame", "frameset", "head", "option", "optgroup", |
||
| 2107 | "tbody", "td", "tfoot", "th", "thead", "tr". */ |
||
| 2108 | case 'caption': case 'col': case 'colgroup': case 'frame': |
||
| 2109 | case 'frameset': case 'head': case 'option': case 'optgroup': |
||
| 2110 | case 'tbody': case 'td': case 'tfoot': case 'th': case 'thead': |
||
| 2111 | case 'tr': |
||
| 2112 | // Parse error. Ignore the token. |
||
| 2113 | break; |
||
| 2114 | |||
| 2115 | /* A start or end tag whose tag name is one of: "event-source", |
||
| 2116 | "section", "nav", "article", "aside", "header", "footer", |
||
| 2117 | "datagrid", "command" */ |
||
| 2118 | case 'event-source': case 'section': case 'nav': case 'article': |
||
| 2119 | case 'aside': case 'header': case 'footer': case 'datagrid': |
||
| 2120 | case 'command': |
||
| 2121 | // Work in progress! |
||
| 2122 | break; |
||
| 2123 | |||
| 2124 | /* A start tag token not covered by the previous entries */ |
||
| 2125 | default: |
||
| 2126 | /* Reconstruct the active formatting elements, if any. */ |
||
| 2127 | $this->reconstructActiveFormattingElements(); |
||
| 2128 | |||
| 2129 | $this->insertElement($token); |
||
| 2130 | break; |
||
| 2131 | } |
||
| 2132 | break; |
||
| 2133 | |||
| 2134 | case HTML5::ENDTAG: |
||
| 2135 | switch($token['name']) { |
||
| 2136 | /* An end tag with the tag name "body" */ |
||
| 2137 | case 'body': |
||
| 2138 | /* If the second element in the stack of open elements is |
||
| 2139 | not a body element, this is a parse error. Ignore the token. |
||
| 2140 | (innerHTML case) */ |
||
| 2141 | if(count($this->stack) < 2 || $this->stack[1]->nodeName !== 'body') { |
||
| 2142 | // Ignore. |
||
| 2143 | |||
| 2144 | /* If the current node is not the body element, then this |
||
| 2145 | is a parse error. */ |
||
| 2146 | } elseif(end($this->stack)->nodeName !== 'body') { |
||
| 2147 | // Parse error. |
||
| 2148 | } |
||
| 2149 | |||
| 2150 | /* Change the insertion mode to "after body". */ |
||
| 2151 | $this->mode = self::AFTER_BODY; |
||
| 2152 | break; |
||
| 2153 | |||
| 2154 | /* An end tag with the tag name "html" */ |
||
| 2155 | case 'html': |
||
| 2156 | /* Act as if an end tag with tag name "body" had been seen, |
||
| 2157 | then, if that token wasn't ignored, reprocess the current |
||
| 2158 | token. */ |
||
| 2159 | $this->inBody(array( |
||
| 2160 | 'name' => 'body', |
||
| 2161 | 'type' => HTML5::ENDTAG |
||
| 2162 | )); |
||
| 2163 | |||
| 2164 | return $this->afterBody($token); |
||
| 2165 | break; |
||
| 2166 | |||
| 2167 | /* An end tag whose tag name is one of: "address", "blockquote", |
||
| 2168 | "center", "dir", "div", "dl", "fieldset", "listing", "menu", |
||
| 2169 | "ol", "pre", "ul" */ |
||
| 2170 | case 'address': case 'blockquote': case 'center': case 'dir': |
||
| 2171 | case 'div': case 'dl': case 'fieldset': case 'listing': |
||
| 2172 | case 'menu': case 'ol': case 'pre': case 'ul': |
||
| 2173 | /* If the stack of open elements has an element in scope |
||
| 2174 | with the same tag name as that of the token, then generate |
||
| 2175 | implied end tags. */ |
||
| 2176 | if($this->elementInScope($token['name'])) { |
||
| 2177 | $this->generateImpliedEndTags(); |
||
| 2178 | |||
| 2179 | /* Now, if the current node is not an element with |
||
| 2180 | the same tag name as that of the token, then this |
||
| 2181 | is a parse error. */ |
||
| 2182 | // w/e |
||
| 2183 | |||
| 2184 | /* If the stack of open elements has an element in |
||
| 2185 | scope with the same tag name as that of the token, |
||
| 2186 | then pop elements from this stack until an element |
||
| 2187 | with that tag name has been popped from the stack. */ |
||
| 2188 | for($n = count($this->stack) - 1; $n >= 0; $n--) { |
||
| 2189 | if($this->stack[$n]->nodeName === $token['name']) { |
||
| 2190 | $n = -1; |
||
| 2191 | } |
||
| 2192 | |||
| 2193 | array_pop($this->stack); |
||
| 2194 | } |
||
| 2195 | } |
||
| 2196 | break; |
||
| 2197 | |||
| 2198 | /* An end tag whose tag name is "form" */ |
||
| 2199 | case 'form': |
||
| 2200 | /* If the stack of open elements has an element in scope |
||
| 2201 | with the same tag name as that of the token, then generate |
||
| 2202 | implied end tags. */ |
||
| 2203 | if($this->elementInScope($token['name'])) { |
||
| 2204 | $this->generateImpliedEndTags(); |
||
| 2205 | |||
| 2206 | } |
||
| 2207 | |||
| 2208 | if(end($this->stack)->nodeName !== $token['name']) { |
||
| 2209 | /* Now, if the current node is not an element with the |
||
| 2210 | same tag name as that of the token, then this is a parse |
||
| 2211 | error. */ |
||
| 2212 | // w/e |
||
| 2213 | |||
| 2214 | } else { |
||
| 2215 | /* Otherwise, if the current node is an element with |
||
| 2216 | the same tag name as that of the token pop that element |
||
| 2217 | from the stack. */ |
||
| 2218 | array_pop($this->stack); |
||
| 2219 | } |
||
| 2220 | |||
| 2221 | /* In any case, set the form element pointer to null. */ |
||
| 2222 | $this->form_pointer = null; |
||
| 2223 | break; |
||
| 2224 | |||
| 2225 | /* An end tag whose tag name is "p" */ |
||
| 2226 | case 'p': |
||
| 2227 | /* If the stack of open elements has a p element in scope, |
||
| 2228 | then generate implied end tags, except for p elements. */ |
||
| 2229 | if($this->elementInScope('p')) { |
||
| 2230 | $this->generateImpliedEndTags(array('p')); |
||
| 2231 | |||
| 2232 | /* If the current node is not a p element, then this is |
||
| 2233 | a parse error. */ |
||
| 2234 | // k |
||
| 2235 | |||
| 2236 | /* If the stack of open elements has a p element in |
||
| 2237 | scope, then pop elements from this stack until the stack |
||
| 2238 | no longer has a p element in scope. */ |
||
| 2239 | for($n = count($this->stack) - 1; $n >= 0; $n--) { |
||
| 2240 | if($this->elementInScope('p')) { |
||
| 2241 | array_pop($this->stack); |
||
| 2242 | |||
| 2243 | } else { |
||
| 2244 | break; |
||
| 2245 | } |
||
| 2246 | } |
||
| 2247 | } |
||
| 2248 | break; |
||
| 2249 | |||
| 2250 | /* An end tag whose tag name is "dd", "dt", or "li" */ |
||
| 2251 | case 'dd': case 'dt': case 'li': |
||
| 2252 | /* If the stack of open elements has an element in scope |
||
| 2253 | whose tag name matches the tag name of the token, then |
||
| 2254 | generate implied end tags, except for elements with the |
||
| 2255 | same tag name as the token. */ |
||
| 2256 | if($this->elementInScope($token['name'])) { |
||
| 2257 | $this->generateImpliedEndTags(array($token['name'])); |
||
| 2258 | |||
| 2259 | /* If the current node is not an element with the same |
||
| 2260 | tag name as the token, then this is a parse error. */ |
||
| 2261 | // w/e |
||
| 2262 | |||
| 2263 | /* If the stack of open elements has an element in scope |
||
| 2264 | whose tag name matches the tag name of the token, then |
||
| 2265 | pop elements from this stack until an element with that |
||
| 2266 | tag name has been popped from the stack. */ |
||
| 2267 | for($n = count($this->stack) - 1; $n >= 0; $n--) { |
||
| 2268 | if($this->stack[$n]->nodeName === $token['name']) { |
||
| 2269 | $n = -1; |
||
| 2270 | } |
||
| 2271 | |||
| 2272 | array_pop($this->stack); |
||
| 2273 | } |
||
| 2274 | } |
||
| 2275 | break; |
||
| 2276 | |||
| 2277 | /* An end tag whose tag name is one of: "h1", "h2", "h3", "h4", |
||
| 2278 | "h5", "h6" */ |
||
| 2279 | case 'h1': case 'h2': case 'h3': case 'h4': case 'h5': case 'h6': |
||
| 2280 | $elements = array('h1', 'h2', 'h3', 'h4', 'h5', 'h6'); |
||
| 2281 | |||
| 2282 | /* If the stack of open elements has in scope an element whose |
||
| 2283 | tag name is one of "h1", "h2", "h3", "h4", "h5", or "h6", then |
||
| 2284 | generate implied end tags. */ |
||
| 2285 | if($this->elementInScope($elements)) { |
||
| 2286 | $this->generateImpliedEndTags(); |
||
| 2287 | |||
| 2288 | /* Now, if the current node is not an element with the same |
||
| 2289 | tag name as that of the token, then this is a parse error. */ |
||
| 2290 | // w/e |
||
| 2291 | |||
| 2292 | /* If the stack of open elements has in scope an element |
||
| 2293 | whose tag name is one of "h1", "h2", "h3", "h4", "h5", or |
||
| 2294 | "h6", then pop elements from the stack until an element |
||
| 2295 | with one of those tag names has been popped from the stack. */ |
||
| 2296 | while($this->elementInScope($elements)) { |
||
| 2297 | array_pop($this->stack); |
||
| 2298 | } |
||
| 2299 | } |
||
| 2300 | break; |
||
| 2301 | |||
| 2302 | /* An end tag whose tag name is one of: "a", "b", "big", "em", |
||
| 2303 | "font", "i", "nobr", "s", "small", "strike", "strong", "tt", "u" */ |
||
| 2304 | case 'a': case 'b': case 'big': case 'em': case 'font': |
||
| 2305 | case 'i': case 'nobr': case 's': case 'small': case 'strike': |
||
| 2306 | case 'strong': case 'tt': case 'u': |
||
| 2307 | /* 1. Let the formatting element be the last element in |
||
| 2308 | the list of active formatting elements that: |
||
| 2309 | * is between the end of the list and the last scope |
||
| 2310 | marker in the list, if any, or the start of the list |
||
| 2311 | otherwise, and |
||
| 2312 | * has the same tag name as the token. |
||
| 2313 | */ |
||
| 2314 | while(true) { |
||
| 2315 | for($a = count($this->a_formatting) - 1; $a >= 0; $a--) { |
||
| 2316 | if($this->a_formatting[$a] === self::MARKER) { |
||
| 2317 | break; |
||
| 2318 | |||
| 2319 | } elseif($this->a_formatting[$a]->tagName === $token['name']) { |
||
| 2320 | $formatting_element = $this->a_formatting[$a]; |
||
| 2321 | $in_stack = in_array($formatting_element, $this->stack, true); |
||
| 2322 | $fe_af_pos = $a; |
||
| 2323 | break; |
||
| 2324 | } |
||
| 2325 | } |
||
| 2326 | |||
| 2327 | /* If there is no such node, or, if that node is |
||
| 2328 | also in the stack of open elements but the element |
||
| 2329 | is not in scope, then this is a parse error. Abort |
||
| 2330 | these steps. The token is ignored. */ |
||
| 2331 | if(!isset($formatting_element) || ($in_stack && |
||
| 2332 | !$this->elementInScope($token['name']))) { |
||
| 2333 | break; |
||
| 2334 | |||
| 2335 | /* Otherwise, if there is such a node, but that node |
||
| 2336 | is not in the stack of open elements, then this is a |
||
| 2337 | parse error; remove the element from the list, and |
||
| 2338 | abort these steps. */ |
||
| 2339 | } elseif(isset($formatting_element) && !$in_stack) { |
||
| 2340 | unset($this->a_formatting[$fe_af_pos]); |
||
| 2341 | $this->a_formatting = array_merge($this->a_formatting); |
||
| 2342 | break; |
||
| 2343 | } |
||
| 2344 | |||
| 2345 | /* 2. Let the furthest block be the topmost node in the |
||
| 2346 | stack of open elements that is lower in the stack |
||
| 2347 | than the formatting element, and is not an element in |
||
| 2348 | the phrasing or formatting categories. There might |
||
| 2349 | not be one. */ |
||
| 2350 | $fe_s_pos = array_search($formatting_element, $this->stack, true); |
||
| 2351 | $length = count($this->stack); |
||
| 2352 | |||
| 2353 | for($s = $fe_s_pos + 1; $s < $length; $s++) { |
||
| 2354 | $category = $this->getElementCategory($this->stack[$s]->nodeName); |
||
| 2355 | |||
| 2356 | if($category !== self::PHRASING && $category !== self::FORMATTING) { |
||
| 2357 | $furthest_block = $this->stack[$s]; |
||
| 2358 | } |
||
| 2359 | } |
||
| 2360 | |||
| 2361 | /* 3. If there is no furthest block, then the UA must |
||
| 2362 | skip the subsequent steps and instead just pop all |
||
| 2363 | the nodes from the bottom of the stack of open |
||
| 2364 | elements, from the current node up to the formatting |
||
| 2365 | element, and remove the formatting element from the |
||
| 2366 | list of active formatting elements. */ |
||
| 2367 | if(!isset($furthest_block)) { |
||
| 2368 | for($n = $length - 1; $n >= $fe_s_pos; $n--) { |
||
| 2369 | array_pop($this->stack); |
||
| 2370 | } |
||
| 2371 | |||
| 2372 | unset($this->a_formatting[$fe_af_pos]); |
||
| 2373 | $this->a_formatting = array_merge($this->a_formatting); |
||
| 2374 | break; |
||
| 2375 | } |
||
| 2376 | |||
| 2377 | /* 4. Let the common ancestor be the element |
||
| 2378 | immediately above the formatting element in the stack |
||
| 2379 | of open elements. */ |
||
| 2380 | $common_ancestor = $this->stack[$fe_s_pos - 1]; |
||
| 2381 | |||
| 2382 | /* 5. If the furthest block has a parent node, then |
||
| 2383 | remove the furthest block from its parent node. */ |
||
| 2384 | if($furthest_block->parentNode !== null) { |
||
| 2385 | $furthest_block->parentNode->removeChild($furthest_block); |
||
| 2386 | } |
||
| 2387 | |||
| 2388 | /* 6. Let a bookmark note the position of the |
||
| 2389 | formatting element in the list of active formatting |
||
| 2390 | elements relative to the elements on either side |
||
| 2391 | of it in the list. */ |
||
| 2392 | $bookmark = $fe_af_pos; |
||
| 2393 | |||
| 2394 | /* 7. Let node and last node be the furthest block. |
||
| 2395 | Follow these steps: */ |
||
| 2396 | $node = $furthest_block; |
||
| 2397 | $last_node = $furthest_block; |
||
| 2398 | |||
| 2399 | while(true) { |
||
| 2400 | for($n = array_search($node, $this->stack, true) - 1; $n >= 0; $n--) { |
||
| 2401 | /* 7.1 Let node be the element immediately |
||
| 2402 | prior to node in the stack of open elements. */ |
||
| 2403 | $node = $this->stack[$n]; |
||
| 2404 | |||
| 2405 | /* 7.2 If node is not in the list of active |
||
| 2406 | formatting elements, then remove node from |
||
| 2407 | the stack of open elements and then go back |
||
| 2408 | to step 1. */ |
||
| 2409 | if(!in_array($node, $this->a_formatting, true)) { |
||
| 2410 | unset($this->stack[$n]); |
||
| 2411 | $this->stack = array_merge($this->stack); |
||
| 2412 | |||
| 2413 | } else { |
||
| 2414 | break; |
||
| 2415 | } |
||
| 2416 | } |
||
| 2417 | |||
| 2418 | /* 7.3 Otherwise, if node is the formatting |
||
| 2419 | element, then go to the next step in the overall |
||
| 2420 | algorithm. */ |
||
| 2421 | if($node === $formatting_element) { |
||
| 2422 | break; |
||
| 2423 | |||
| 2424 | /* 7.4 Otherwise, if last node is the furthest |
||
| 2425 | block, then move the aforementioned bookmark to |
||
| 2426 | be immediately after the node in the list of |
||
| 2427 | active formatting elements. */ |
||
| 2428 | } elseif($last_node === $furthest_block) { |
||
| 2429 | $bookmark = array_search($node, $this->a_formatting, true) + 1; |
||
| 2430 | } |
||
| 2431 | |||
| 2432 | /* 7.5 If node has any children, perform a |
||
| 2433 | shallow clone of node, replace the entry for |
||
| 2434 | node in the list of active formatting elements |
||
| 2435 | with an entry for the clone, replace the entry |
||
| 2436 | for node in the stack of open elements with an |
||
| 2437 | entry for the clone, and let node be the clone. */ |
||
| 2438 | if($node->hasChildNodes()) { |
||
| 2439 | $clone = $node->cloneNode(); |
||
| 2440 | $s_pos = array_search($node, $this->stack, true); |
||
| 2441 | $a_pos = array_search($node, $this->a_formatting, true); |
||
| 2442 | |||
| 2443 | $this->stack[$s_pos] = $clone; |
||
| 2444 | $this->a_formatting[$a_pos] = $clone; |
||
| 2445 | $node = $clone; |
||
| 2446 | } |
||
| 2447 | |||
| 2448 | /* 7.6 Insert last node into node, first removing |
||
| 2449 | it from its previous parent node if any. */ |
||
| 2450 | if($last_node->parentNode !== null) { |
||
| 2451 | $last_node->parentNode->removeChild($last_node); |
||
| 2452 | } |
||
| 2453 | |||
| 2454 | $node->appendChild($last_node); |
||
| 2455 | |||
| 2456 | /* 7.7 Let last node be node. */ |
||
| 2457 | $last_node = $node; |
||
| 2458 | } |
||
| 2459 | |||
| 2460 | /* 8. Insert whatever last node ended up being in |
||
| 2461 | the previous step into the common ancestor node, |
||
| 2462 | first removing it from its previous parent node if |
||
| 2463 | any. */ |
||
| 2464 | if($last_node->parentNode !== null) { |
||
| 2465 | $last_node->parentNode->removeChild($last_node); |
||
| 2466 | } |
||
| 2467 | |||
| 2468 | $common_ancestor->appendChild($last_node); |
||
| 2469 | |||
| 2470 | /* 9. Perform a shallow clone of the formatting |
||
| 2471 | element. */ |
||
| 2472 | $clone = $formatting_element->cloneNode(); |
||
| 2473 | |||
| 2474 | /* 10. Take all of the child nodes of the furthest |
||
| 2475 | block and append them to the clone created in the |
||
| 2476 | last step. */ |
||
| 2477 | while($furthest_block->hasChildNodes()) { |
||
| 2478 | $child = $furthest_block->firstChild; |
||
| 2479 | $furthest_block->removeChild($child); |
||
| 2480 | $clone->appendChild($child); |
||
| 2481 | } |
||
| 2482 | |||
| 2483 | /* 11. Append that clone to the furthest block. */ |
||
| 2484 | $furthest_block->appendChild($clone); |
||
| 2485 | |||
| 2486 | /* 12. Remove the formatting element from the list |
||
| 2487 | of active formatting elements, and insert the clone |
||
| 2488 | into the list of active formatting elements at the |
||
| 2489 | position of the aforementioned bookmark. */ |
||
| 2490 | $fe_af_pos = array_search($formatting_element, $this->a_formatting, true); |
||
| 2491 | unset($this->a_formatting[$fe_af_pos]); |
||
| 2492 | $this->a_formatting = array_merge($this->a_formatting); |
||
| 2493 | |||
| 2494 | $af_part1 = array_slice($this->a_formatting, 0, $bookmark - 1); |
||
| 2495 | $af_part2 = array_slice($this->a_formatting, $bookmark, count($this->a_formatting)); |
||
| 2496 | $this->a_formatting = array_merge($af_part1, array($clone), $af_part2); |
||
| 2497 | |||
| 2498 | /* 13. Remove the formatting element from the stack |
||
| 2499 | of open elements, and insert the clone into the stack |
||
| 2500 | of open elements immediately after (i.e. in a more |
||
| 2501 | deeply nested position than) the position of the |
||
| 2502 | furthest block in that stack. */ |
||
| 2503 | $fe_s_pos = array_search($formatting_element, $this->stack, true); |
||
| 2504 | $fb_s_pos = array_search($furthest_block, $this->stack, true); |
||
| 2505 | unset($this->stack[$fe_s_pos]); |
||
| 2506 | |||
| 2507 | $s_part1 = array_slice($this->stack, 0, $fb_s_pos); |
||
| 2508 | $s_part2 = array_slice($this->stack, $fb_s_pos + 1, count($this->stack)); |
||
| 2509 | $this->stack = array_merge($s_part1, array($clone), $s_part2); |
||
| 2510 | |||
| 2511 | /* 14. Jump back to step 1 in this series of steps. */ |
||
| 2512 | unset($formatting_element, $fe_af_pos, $fe_s_pos, $furthest_block); |
||
| 2513 | } |
||
| 2514 | break; |
||
| 2515 | |||
| 2516 | /* An end tag token whose tag name is one of: "button", |
||
| 2517 | "marquee", "object" */ |
||
| 2518 | case 'button': case 'marquee': case 'object': |
||
| 2519 | /* If the stack of open elements has an element in scope whose |
||
| 2520 | tag name matches the tag name of the token, then generate implied |
||
| 2521 | tags. */ |
||
| 2522 | if($this->elementInScope($token['name'])) { |
||
| 2523 | $this->generateImpliedEndTags(); |
||
| 2524 | |||
| 2525 | /* Now, if the current node is not an element with the same |
||
| 2526 | tag name as the token, then this is a parse error. */ |
||
| 2527 | // k |
||
| 2528 | |||
| 2529 | /* Now, if the stack of open elements has an element in scope |
||
| 2530 | whose tag name matches the tag name of the token, then pop |
||
| 2531 | elements from the stack until that element has been popped from |
||
| 2532 | the stack, and clear the list of active formatting elements up |
||
| 2533 | to the last marker. */ |
||
| 2534 | for($n = count($this->stack) - 1; $n >= 0; $n--) { |
||
| 2535 | if($this->stack[$n]->nodeName === $token['name']) { |
||
| 2536 | $n = -1; |
||
| 2537 | } |
||
| 2538 | |||
| 2539 | array_pop($this->stack); |
||
| 2540 | } |
||
| 2541 | |||
| 2542 | $marker = end(array_keys($this->a_formatting, self::MARKER, true)); |
||
| 2543 | |||
| 2544 | for($n = count($this->a_formatting) - 1; $n > $marker; $n--) { |
||
| 2545 | array_pop($this->a_formatting); |
||
| 2546 | } |
||
| 2547 | } |
||
| 2548 | break; |
||
| 2549 | |||
| 2550 | /* Or an end tag whose tag name is one of: "area", "basefont", |
||
| 2551 | "bgsound", "br", "embed", "hr", "iframe", "image", "img", |
||
| 2552 | "input", "isindex", "noembed", "noframes", "param", "select", |
||
| 2553 | "spacer", "table", "textarea", "wbr" */ |
||
| 2554 | case 'area': case 'basefont': case 'bgsound': case 'br': |
||
| 2555 | case 'embed': case 'hr': case 'iframe': case 'image': |
||
| 2556 | case 'img': case 'input': case 'isindex': case 'noembed': |
||
| 2557 | case 'noframes': case 'param': case 'select': case 'spacer': |
||
| 2558 | case 'table': case 'textarea': case 'wbr': |
||
| 2559 | // Parse error. Ignore the token. |
||
| 2560 | break; |
||
| 2561 | |||
| 2562 | /* An end tag token not covered by the previous entries */ |
||
| 2563 | default: |
||
| 2564 | for($n = count($this->stack) - 1; $n >= 0; $n--) { |
||
| 2565 | /* Initialise node to be the current node (the bottommost |
||
| 2566 | node of the stack). */ |
||
| 2567 | $node = end($this->stack); |
||
| 2568 | |||
| 2569 | /* If node has the same tag name as the end tag token, |
||
| 2570 | then: */ |
||
| 2571 | if($token['name'] === $node->nodeName) { |
||
| 2572 | /* Generate implied end tags. */ |
||
| 2573 | $this->generateImpliedEndTags(); |
||
| 2574 | |||
| 2575 | /* If the tag name of the end tag token does not |
||
| 2576 | match the tag name of the current node, this is a |
||
| 2577 | parse error. */ |
||
| 2578 | // k |
||
| 2579 | |||
| 2580 | /* Pop all the nodes from the current node up to |
||
| 2581 | node, including node, then stop this algorithm. */ |
||
| 2582 | for($x = count($this->stack) - $n; $x >= $n; $x--) { |
||
| 2583 | array_pop($this->stack); |
||
| 2584 | } |
||
| 2585 | |||
| 2586 | } else { |
||
| 2587 | $category = $this->getElementCategory($node); |
||
| 2588 | |||
| 2589 | if($category !== self::SPECIAL && $category !== self::SCOPING) { |
||
| 2590 | /* Otherwise, if node is in neither the formatting |
||
| 2591 | category nor the phrasing category, then this is a |
||
| 2592 | parse error. Stop this algorithm. The end tag token |
||
| 2593 | is ignored. */ |
||
| 2594 | return false; |
||
| 2595 | } |
||
| 2596 | } |
||
| 2597 | } |
||
| 2598 | break; |
||
| 2599 | } |
||
| 2600 | break; |
||
| 2601 | } |
||
| 2602 | } |
||
| 2603 | |||
| 2604 | private function inTable($token) |
||
| 2605 | { |
||
| 2606 | $clear = array('html', 'table'); |
||
| 2607 | |||
| 2608 | /* A character token that is one of one of U+0009 CHARACTER TABULATION, |
||
| 2609 | U+000A LINE FEED (LF), U+000B LINE TABULATION, U+000C FORM FEED (FF), |
||
| 2610 | or U+0020 SPACE */ |
||
| 2611 | if($token['type'] === HTML5::CHARACTR && |
||
| 2612 | preg_match('/^[\t\n\x0b\x0c ]+$/', $token['data'])) { |
||
| 2613 | /* Append the character to the current node. */ |
||
| 2614 | $text = $this->dom->createTextNode($token['data']); |
||
| 2615 | end($this->stack)->appendChild($text); |
||
| 2616 | |||
| 2617 | /* A comment token */ |
||
| 2618 | } elseif($token['type'] === HTML5::COMMENT) { |
||
| 2619 | /* Append a Comment node to the current node with the data |
||
| 2620 | attribute set to the data given in the comment token. */ |
||
| 2621 | $comment = $this->dom->createComment($token['data']); |
||
| 2622 | end($this->stack)->appendChild($comment); |
||
| 2623 | |||
| 2624 | /* A start tag whose tag name is "caption" */ |
||
| 2625 | } elseif($token['type'] === HTML5::STARTTAG && |
||
| 2626 | $token['name'] === 'caption') { |
||
| 2627 | /* Clear the stack back to a table context. */ |
||
| 2628 | $this->clearStackToTableContext($clear); |
||
| 2629 | |||
| 2630 | /* Insert a marker at the end of the list of active |
||
| 2631 | formatting elements. */ |
||
| 2632 | $this->a_formatting[] = self::MARKER; |
||
| 2633 | |||
| 2634 | /* Insert an HTML element for the token, then switch the |
||
| 2635 | insertion mode to "in caption". */ |
||
| 2636 | $this->insertElement($token); |
||
| 2637 | $this->mode = self::IN_CAPTION; |
||
| 2638 | |||
| 2639 | /* A start tag whose tag name is "colgroup" */ |
||
| 2640 | } elseif($token['type'] === HTML5::STARTTAG && |
||
| 2641 | $token['name'] === 'colgroup') { |
||
| 2642 | /* Clear the stack back to a table context. */ |
||
| 2643 | $this->clearStackToTableContext($clear); |
||
| 2644 | |||
| 2645 | /* Insert an HTML element for the token, then switch the |
||
| 2646 | insertion mode to "in column group". */ |
||
| 2647 | $this->insertElement($token); |
||
| 2648 | $this->mode = self::IN_CGROUP; |
||
| 2649 | |||
| 2650 | /* A start tag whose tag name is "col" */ |
||
| 2651 | } elseif($token['type'] === HTML5::STARTTAG && |
||
| 2652 | $token['name'] === 'col') { |
||
| 2653 | $this->inTable(array( |
||
| 2654 | 'name' => 'colgroup', |
||
| 2655 | 'type' => HTML5::STARTTAG, |
||
| 2656 | 'attr' => array() |
||
| 2657 | )); |
||
| 2658 | |||
| 2659 | $this->inColumnGroup($token); |
||
| 2660 | |||
| 2661 | /* A start tag whose tag name is one of: "tbody", "tfoot", "thead" */ |
||
| 2662 | } elseif($token['type'] === HTML5::STARTTAG && in_array($token['name'], |
||
| 2663 | array('tbody', 'tfoot', 'thead'))) { |
||
| 2664 | /* Clear the stack back to a table context. */ |
||
| 2665 | $this->clearStackToTableContext($clear); |
||
| 2666 | |||
| 2667 | /* Insert an HTML element for the token, then switch the insertion |
||
| 2668 | mode to "in table body". */ |
||
| 2669 | $this->insertElement($token); |
||
| 2670 | $this->mode = self::IN_TBODY; |
||
| 2671 | |||
| 2672 | /* A start tag whose tag name is one of: "td", "th", "tr" */ |
||
| 2673 | } elseif($token['type'] === HTML5::STARTTAG && |
||
| 2674 | in_array($token['name'], array('td', 'th', 'tr'))) { |
||
| 2675 | /* Act as if a start tag token with the tag name "tbody" had been |
||
| 2676 | seen, then reprocess the current token. */ |
||
| 2677 | $this->inTable(array( |
||
| 2678 | 'name' => 'tbody', |
||
| 2679 | 'type' => HTML5::STARTTAG, |
||
| 2680 | 'attr' => array() |
||
| 2681 | )); |
||
| 2682 | |||
| 2683 | return $this->inTableBody($token); |
||
| 2684 | |||
| 2685 | /* A start tag whose tag name is "table" */ |
||
| 2686 | } elseif($token['type'] === HTML5::STARTTAG && |
||
| 2687 | $token['name'] === 'table') { |
||
| 2688 | /* Parse error. Act as if an end tag token with the tag name "table" |
||
| 2689 | had been seen, then, if that token wasn't ignored, reprocess the |
||
| 2690 | current token. */ |
||
| 2691 | $this->inTable(array( |
||
| 2692 | 'name' => 'table', |
||
| 2693 | 'type' => HTML5::ENDTAG |
||
| 2694 | )); |
||
| 2695 | |||
| 2696 | return $this->mainPhase($token); |
||
| 2697 | |||
| 2698 | /* An end tag whose tag name is "table" */ |
||
| 2699 | } elseif($token['type'] === HTML5::ENDTAG && |
||
| 2700 | $token['name'] === 'table') { |
||
| 2701 | /* If the stack of open elements does not have an element in table |
||
| 2702 | scope with the same tag name as the token, this is a parse error. |
||
| 2703 | Ignore the token. (innerHTML case) */ |
||
| 2704 | if(!$this->elementInScope($token['name'], true)) { |
||
| 2705 | return false; |
||
| 2706 | |||
| 2707 | /* Otherwise: */ |
||
| 2708 | } else { |
||
| 2709 | /* Generate implied end tags. */ |
||
| 2710 | $this->generateImpliedEndTags(); |
||
| 2711 | |||
| 2712 | /* Now, if the current node is not a table element, then this |
||
| 2713 | is a parse error. */ |
||
| 2714 | // w/e |
||
| 2715 | |||
| 2716 | /* Pop elements from this stack until a table element has been |
||
| 2717 | popped from the stack. */ |
||
| 2718 | while(true) { |
||
| 2719 | $current = end($this->stack)->nodeName; |
||
| 2720 | array_pop($this->stack); |
||
| 2721 | |||
| 2722 | if($current === 'table') { |
||
| 2723 | break; |
||
| 2724 | } |
||
| 2725 | } |
||
| 2726 | |||
| 2727 | /* Reset the insertion mode appropriately. */ |
||
| 2728 | $this->resetInsertionMode(); |
||
| 2729 | } |
||
| 2730 | |||
| 2731 | /* An end tag whose tag name is one of: "body", "caption", "col", |
||
| 2732 | "colgroup", "html", "tbody", "td", "tfoot", "th", "thead", "tr" */ |
||
| 2733 | } elseif($token['type'] === HTML5::ENDTAG && in_array($token['name'], |
||
| 2734 | array('body', 'caption', 'col', 'colgroup', 'html', 'tbody', 'td', |
||
| 2735 | 'tfoot', 'th', 'thead', 'tr'))) { |
||
| 2736 | // Parse error. Ignore the token. |
||
| 2737 | |||
| 2738 | /* Anything else */ |
||
| 2739 | } else { |
||
| 2740 | /* Parse error. Process the token as if the insertion mode was "in |
||
| 2741 | body", with the following exception: */ |
||
| 2742 | |||
| 2743 | /* If the current node is a table, tbody, tfoot, thead, or tr |
||
| 2744 | element, then, whenever a node would be inserted into the current |
||
| 2745 | node, it must instead be inserted into the foster parent element. */ |
||
| 2746 | if(in_array(end($this->stack)->nodeName, |
||
| 2747 | array('table', 'tbody', 'tfoot', 'thead', 'tr'))) { |
||
| 2748 | /* The foster parent element is the parent element of the last |
||
| 2749 | table element in the stack of open elements, if there is a |
||
| 2750 | table element and it has such a parent element. If there is no |
||
| 2751 | table element in the stack of open elements (innerHTML case), |
||
| 2752 | then the foster parent element is the first element in the |
||
| 2753 | stack of open elements (the html element). Otherwise, if there |
||
| 2754 | is a table element in the stack of open elements, but the last |
||
| 2755 | table element in the stack of open elements has no parent, or |
||
| 2756 | its parent node is not an element, then the foster parent |
||
| 2757 | element is the element before the last table element in the |
||
| 2758 | stack of open elements. */ |
||
| 2759 | for($n = count($this->stack) - 1; $n >= 0; $n--) { |
||
| 2760 | if($this->stack[$n]->nodeName === 'table') { |
||
| 2761 | $table = $this->stack[$n]; |
||
| 2762 | break; |
||
| 2763 | } |
||
| 2764 | } |
||
| 2765 | |||
| 2766 | if(isset($table) && $table->parentNode !== null) { |
||
| 2767 | $this->foster_parent = $table->parentNode; |
||
| 2768 | |||
| 2769 | } elseif(!isset($table)) { |
||
| 2770 | $this->foster_parent = $this->stack[0]; |
||
| 2771 | |||
| 2772 | } elseif(isset($table) && ($table->parentNode === null || |
||
| 2773 | $table->parentNode->nodeType !== XML_ELEMENT_NODE)) { |
||
| 2774 | $this->foster_parent = $this->stack[$n - 1]; |
||
| 2775 | } |
||
| 2776 | } |
||
| 2777 | |||
| 2778 | $this->inBody($token); |
||
| 2779 | } |
||
| 2780 | } |
||
| 2781 | |||
| 2782 | private function inCaption($token) |
||
| 2783 | { |
||
| 2784 | /* An end tag whose tag name is "caption" */ |
||
| 2785 | if($token['type'] === HTML5::ENDTAG && $token['name'] === 'caption') { |
||
| 2786 | /* If the stack of open elements does not have an element in table |
||
| 2787 | scope with the same tag name as the token, this is a parse error. |
||
| 2788 | Ignore the token. (innerHTML case) */ |
||
| 2789 | if(!$this->elementInScope($token['name'], true)) { |
||
| 2790 | // Ignore |
||
| 2791 | |||
| 2792 | /* Otherwise: */ |
||
| 2793 | } else { |
||
| 2794 | /* Generate implied end tags. */ |
||
| 2795 | $this->generateImpliedEndTags(); |
||
| 2796 | |||
| 2797 | /* Now, if the current node is not a caption element, then this |
||
| 2798 | is a parse error. */ |
||
| 2799 | // w/e |
||
| 2800 | |||
| 2801 | /* Pop elements from this stack until a caption element has |
||
| 2802 | been popped from the stack. */ |
||
| 2803 | while(true) { |
||
| 2804 | $node = end($this->stack)->nodeName; |
||
| 2805 | array_pop($this->stack); |
||
| 2806 | |||
| 2807 | if($node === 'caption') { |
||
| 2808 | break; |
||
| 2809 | } |
||
| 2810 | } |
||
| 2811 | |||
| 2812 | /* Clear the list of active formatting elements up to the last |
||
| 2813 | marker. */ |
||
| 2814 | $this->clearTheActiveFormattingElementsUpToTheLastMarker(); |
||
| 2815 | |||
| 2816 | /* Switch the insertion mode to "in table". */ |
||
| 2817 | $this->mode = self::IN_TABLE; |
||
| 2818 | } |
||
| 2819 | |||
| 2820 | /* A start tag whose tag name is one of: "caption", "col", "colgroup", |
||
| 2821 | "tbody", "td", "tfoot", "th", "thead", "tr", or an end tag whose tag |
||
| 2822 | name is "table" */ |
||
| 2823 | } elseif(($token['type'] === HTML5::STARTTAG && in_array($token['name'], |
||
| 2824 | array('caption', 'col', 'colgroup', 'tbody', 'td', 'tfoot', 'th', |
||
| 2825 | 'thead', 'tr'))) || ($token['type'] === HTML5::ENDTAG && |
||
| 2826 | $token['name'] === 'table')) { |
||
| 2827 | /* Parse error. Act as if an end tag with the tag name "caption" |
||
| 2828 | had been seen, then, if that token wasn't ignored, reprocess the |
||
| 2829 | current token. */ |
||
| 2830 | $this->inCaption(array( |
||
| 2831 | 'name' => 'caption', |
||
| 2832 | 'type' => HTML5::ENDTAG |
||
| 2833 | )); |
||
| 2834 | |||
| 2835 | return $this->inTable($token); |
||
| 2836 | |||
| 2837 | /* An end tag whose tag name is one of: "body", "col", "colgroup", |
||
| 2838 | "html", "tbody", "td", "tfoot", "th", "thead", "tr" */ |
||
| 2839 | } elseif($token['type'] === HTML5::ENDTAG && in_array($token['name'], |
||
| 2840 | array('body', 'col', 'colgroup', 'html', 'tbody', 'tfoot', 'th', |
||
| 2841 | 'thead', 'tr'))) { |
||
| 2842 | // Parse error. Ignore the token. |
||
| 2843 | |||
| 2844 | /* Anything else */ |
||
| 2845 | } else { |
||
| 2846 | /* Process the token as if the insertion mode was "in body". */ |
||
| 2847 | $this->inBody($token); |
||
| 2848 | } |
||
| 2849 | } |
||
| 2850 | |||
| 2851 | private function inColumnGroup($token) |
||
| 2852 | { |
||
| 2853 | /* A character token that is one of one of U+0009 CHARACTER TABULATION, |
||
| 2854 | U+000A LINE FEED (LF), U+000B LINE TABULATION, U+000C FORM FEED (FF), |
||
| 2855 | or U+0020 SPACE */ |
||
| 2856 | if($token['type'] === HTML5::CHARACTR && |
||
| 2857 | preg_match('/^[\t\n\x0b\x0c ]+$/', $token['data'])) { |
||
| 2858 | /* Append the character to the current node. */ |
||
| 2859 | $text = $this->dom->createTextNode($token['data']); |
||
| 2860 | end($this->stack)->appendChild($text); |
||
| 2861 | |||
| 2862 | /* A comment token */ |
||
| 2863 | } elseif($token['type'] === HTML5::COMMENT) { |
||
| 2864 | /* Append a Comment node to the current node with the data |
||
| 2865 | attribute set to the data given in the comment token. */ |
||
| 2866 | $comment = $this->dom->createComment($token['data']); |
||
| 2867 | end($this->stack)->appendChild($comment); |
||
| 2868 | |||
| 2869 | /* A start tag whose tag name is "col" */ |
||
| 2870 | } elseif($token['type'] === HTML5::STARTTAG && $token['name'] === 'col') { |
||
| 2871 | /* Insert a col element for the token. Immediately pop the current |
||
| 2872 | node off the stack of open elements. */ |
||
| 2873 | $this->insertElement($token); |
||
| 2874 | array_pop($this->stack); |
||
| 2875 | |||
| 2876 | /* An end tag whose tag name is "colgroup" */ |
||
| 2877 | } elseif($token['type'] === HTML5::ENDTAG && |
||
| 2878 | $token['name'] === 'colgroup') { |
||
| 2879 | /* If the current node is the root html element, then this is a |
||
| 2880 | parse error, ignore the token. (innerHTML case) */ |
||
| 2881 | if(end($this->stack)->nodeName === 'html') { |
||
| 2882 | // Ignore |
||
| 2883 | |||
| 2884 | /* Otherwise, pop the current node (which will be a colgroup |
||
| 2885 | element) from the stack of open elements. Switch the insertion |
||
| 2886 | mode to "in table". */ |
||
| 2887 | } else { |
||
| 2888 | array_pop($this->stack); |
||
| 2889 | $this->mode = self::IN_TABLE; |
||
| 2890 | } |
||
| 2891 | |||
| 2892 | /* An end tag whose tag name is "col" */ |
||
| 2893 | } elseif($token['type'] === HTML5::ENDTAG && $token['name'] === 'col') { |
||
| 2894 | /* Parse error. Ignore the token. */ |
||
| 2895 | |||
| 2896 | /* Anything else */ |
||
| 2897 | } else { |
||
| 2898 | /* Act as if an end tag with the tag name "colgroup" had been seen, |
||
| 2899 | and then, if that token wasn't ignored, reprocess the current token. */ |
||
| 2900 | $this->inColumnGroup(array( |
||
| 2901 | 'name' => 'colgroup', |
||
| 2902 | 'type' => HTML5::ENDTAG |
||
| 2903 | )); |
||
| 2904 | |||
| 2905 | return $this->inTable($token); |
||
| 2906 | } |
||
| 2907 | } |
||
| 2908 | |||
| 2909 | private function inTableBody($token) |
||
| 2910 | { |
||
| 2911 | $clear = array('tbody', 'tfoot', 'thead', 'html'); |
||
| 2912 | |||
| 2913 | /* A start tag whose tag name is "tr" */ |
||
| 2914 | if($token['type'] === HTML5::STARTTAG && $token['name'] === 'tr') { |
||
| 2915 | /* Clear the stack back to a table body context. */ |
||
| 2916 | $this->clearStackToTableContext($clear); |
||
| 2917 | |||
| 2918 | /* Insert a tr element for the token, then switch the insertion |
||
| 2919 | mode to "in row". */ |
||
| 2920 | $this->insertElement($token); |
||
| 2921 | $this->mode = self::IN_ROW; |
||
| 2922 | |||
| 2923 | /* A start tag whose tag name is one of: "th", "td" */ |
||
| 2924 | } elseif($token['type'] === HTML5::STARTTAG && |
||
| 2925 | ($token['name'] === 'th' || $token['name'] === 'td')) { |
||
| 2926 | /* Parse error. Act as if a start tag with the tag name "tr" had |
||
| 2927 | been seen, then reprocess the current token. */ |
||
| 2928 | $this->inTableBody(array( |
||
| 2929 | 'name' => 'tr', |
||
| 2930 | 'type' => HTML5::STARTTAG, |
||
| 2931 | 'attr' => array() |
||
| 2932 | )); |
||
| 2933 | |||
| 2934 | return $this->inRow($token); |
||
| 2935 | |||
| 2936 | /* An end tag whose tag name is one of: "tbody", "tfoot", "thead" */ |
||
| 2937 | } elseif($token['type'] === HTML5::ENDTAG && |
||
| 2938 | in_array($token['name'], array('tbody', 'tfoot', 'thead'))) { |
||
| 2939 | /* If the stack of open elements does not have an element in table |
||
| 2940 | scope with the same tag name as the token, this is a parse error. |
||
| 2941 | Ignore the token. */ |
||
| 2942 | if(!$this->elementInScope($token['name'], true)) { |
||
| 2943 | // Ignore |
||
| 2944 | |||
| 2945 | /* Otherwise: */ |
||
| 2946 | } else { |
||
| 2947 | /* Clear the stack back to a table body context. */ |
||
| 2948 | $this->clearStackToTableContext($clear); |
||
| 2949 | |||
| 2950 | /* Pop the current node from the stack of open elements. Switch |
||
| 2951 | the insertion mode to "in table". */ |
||
| 2952 | array_pop($this->stack); |
||
| 2953 | $this->mode = self::IN_TABLE; |
||
| 2954 | } |
||
| 2955 | |||
| 2956 | /* A start tag whose tag name is one of: "caption", "col", "colgroup", |
||
| 2957 | "tbody", "tfoot", "thead", or an end tag whose tag name is "table" */ |
||
| 2958 | } elseif(($token['type'] === HTML5::STARTTAG && in_array($token['name'], |
||
| 2959 | array('caption', 'col', 'colgroup', 'tbody', 'tfoor', 'thead'))) || |
||
| 2960 | ($token['type'] === HTML5::STARTTAG && $token['name'] === 'table')) { |
||
| 2961 | /* If the stack of open elements does not have a tbody, thead, or |
||
| 2962 | tfoot element in table scope, this is a parse error. Ignore the |
||
| 2963 | token. (innerHTML case) */ |
||
| 2964 | if(!$this->elementInScope(array('tbody', 'thead', 'tfoot'), true)) { |
||
| 2965 | // Ignore. |
||
| 2966 | |||
| 2967 | /* Otherwise: */ |
||
| 2968 | } else { |
||
| 2969 | /* Clear the stack back to a table body context. */ |
||
| 2970 | $this->clearStackToTableContext($clear); |
||
| 2971 | |||
| 2972 | /* Act as if an end tag with the same tag name as the current |
||
| 2973 | node ("tbody", "tfoot", or "thead") had been seen, then |
||
| 2974 | reprocess the current token. */ |
||
| 2975 | $this->inTableBody(array( |
||
| 2976 | 'name' => end($this->stack)->nodeName, |
||
| 2977 | 'type' => HTML5::ENDTAG |
||
| 2978 | )); |
||
| 2979 | |||
| 2980 | return $this->mainPhase($token); |
||
| 2981 | } |
||
| 2982 | |||
| 2983 | /* An end tag whose tag name is one of: "body", "caption", "col", |
||
| 2984 | "colgroup", "html", "td", "th", "tr" */ |
||
| 2985 | } elseif($token['type'] === HTML5::ENDTAG && in_array($token['name'], |
||
| 2986 | array('body', 'caption', 'col', 'colgroup', 'html', 'td', 'th', 'tr'))) { |
||
| 2987 | /* Parse error. Ignore the token. */ |
||
| 2988 | |||
| 2989 | /* Anything else */ |
||
| 2990 | } else { |
||
| 2991 | /* Process the token as if the insertion mode was "in table". */ |
||
| 2992 | $this->inTable($token); |
||
| 2993 | } |
||
| 2994 | } |
||
| 2995 | |||
| 2996 | private function inRow($token) |
||
| 2997 | { |
||
| 2998 | $clear = array('tr', 'html'); |
||
| 2999 | |||
| 3000 | /* A start tag whose tag name is one of: "th", "td" */ |
||
| 3001 | if($token['type'] === HTML5::STARTTAG && |
||
| 3002 | ($token['name'] === 'th' || $token['name'] === 'td')) { |
||
| 3003 | /* Clear the stack back to a table row context. */ |
||
| 3004 | $this->clearStackToTableContext($clear); |
||
| 3005 | |||
| 3006 | /* Insert an HTML element for the token, then switch the insertion |
||
| 3007 | mode to "in cell". */ |
||
| 3008 | $this->insertElement($token); |
||
| 3009 | $this->mode = self::IN_CELL; |
||
| 3010 | |||
| 3011 | /* Insert a marker at the end of the list of active formatting |
||
| 3012 | elements. */ |
||
| 3013 | $this->a_formatting[] = self::MARKER; |
||
| 3014 | |||
| 3015 | /* An end tag whose tag name is "tr" */ |
||
| 3016 | } elseif($token['type'] === HTML5::ENDTAG && $token['name'] === 'tr') { |
||
| 3017 | /* If the stack of open elements does not have an element in table |
||
| 3018 | scope with the same tag name as the token, this is a parse error. |
||
| 3019 | Ignore the token. (innerHTML case) */ |
||
| 3020 | if(!$this->elementInScope($token['name'], true)) { |
||
| 3021 | // Ignore. |
||
| 3022 | |||
| 3023 | /* Otherwise: */ |
||
| 3024 | } else { |
||
| 3025 | /* Clear the stack back to a table row context. */ |
||
| 3026 | $this->clearStackToTableContext($clear); |
||
| 3027 | |||
| 3028 | /* Pop the current node (which will be a tr element) from the |
||
| 3029 | stack of open elements. Switch the insertion mode to "in table |
||
| 3030 | body". */ |
||
| 3031 | array_pop($this->stack); |
||
| 3032 | $this->mode = self::IN_TBODY; |
||
| 3033 | } |
||
| 3034 | |||
| 3035 | /* A start tag whose tag name is one of: "caption", "col", "colgroup", |
||
| 3036 | "tbody", "tfoot", "thead", "tr" or an end tag whose tag name is "table" */ |
||
| 3037 | } elseif($token['type'] === HTML5::STARTTAG && in_array($token['name'], |
||
| 3038 | array('caption', 'col', 'colgroup', 'tbody', 'tfoot', 'thead', 'tr'))) { |
||
| 3039 | /* Act as if an end tag with the tag name "tr" had been seen, then, |
||
| 3040 | if that token wasn't ignored, reprocess the current token. */ |
||
| 3041 | $this->inRow(array( |
||
| 3042 | 'name' => 'tr', |
||
| 3043 | 'type' => HTML5::ENDTAG |
||
| 3044 | )); |
||
| 3045 | |||
| 3046 | return $this->inCell($token); |
||
| 3047 | |||
| 3048 | /* An end tag whose tag name is one of: "tbody", "tfoot", "thead" */ |
||
| 3049 | } elseif($token['type'] === HTML5::ENDTAG && |
||
| 3050 | in_array($token['name'], array('tbody', 'tfoot', 'thead'))) { |
||
| 3051 | /* If the stack of open elements does not have an element in table |
||
| 3052 | scope with the same tag name as the token, this is a parse error. |
||
| 3053 | Ignore the token. */ |
||
| 3054 | if(!$this->elementInScope($token['name'], true)) { |
||
| 3055 | // Ignore. |
||
| 3056 | |||
| 3057 | /* Otherwise: */ |
||
| 3058 | } else { |
||
| 3059 | /* Otherwise, act as if an end tag with the tag name "tr" had |
||
| 3060 | been seen, then reprocess the current token. */ |
||
| 3061 | $this->inRow(array( |
||
| 3062 | 'name' => 'tr', |
||
| 3063 | 'type' => HTML5::ENDTAG |
||
| 3064 | )); |
||
| 3065 | |||
| 3066 | return $this->inCell($token); |
||
| 3067 | } |
||
| 3068 | |||
| 3069 | /* An end tag whose tag name is one of: "body", "caption", "col", |
||
| 3070 | "colgroup", "html", "td", "th" */ |
||
| 3071 | } elseif($token['type'] === HTML5::ENDTAG && in_array($token['name'], |
||
| 3072 | array('body', 'caption', 'col', 'colgroup', 'html', 'td', 'th', 'tr'))) { |
||
| 3073 | /* Parse error. Ignore the token. */ |
||
| 3074 | |||
| 3075 | /* Anything else */ |
||
| 3076 | } else { |
||
| 3077 | /* Process the token as if the insertion mode was "in table". */ |
||
| 3078 | $this->inTable($token); |
||
| 3079 | } |
||
| 3080 | } |
||
| 3081 | |||
| 3082 | private function inCell($token) |
||
| 3083 | { |
||
| 3084 | /* An end tag whose tag name is one of: "td", "th" */ |
||
| 3085 | if($token['type'] === HTML5::ENDTAG && |
||
| 3086 | ($token['name'] === 'td' || $token['name'] === 'th')) { |
||
| 3087 | /* If the stack of open elements does not have an element in table |
||
| 3088 | scope with the same tag name as that of the token, then this is a |
||
| 3089 | parse error and the token must be ignored. */ |
||
| 3090 | if(!$this->elementInScope($token['name'], true)) { |
||
| 3091 | // Ignore. |
||
| 3092 | |||
| 3093 | /* Otherwise: */ |
||
| 3094 | } else { |
||
| 3095 | /* Generate implied end tags, except for elements with the same |
||
| 3096 | tag name as the token. */ |
||
| 3097 | $this->generateImpliedEndTags(array($token['name'])); |
||
| 3098 | |||
| 3099 | /* Now, if the current node is not an element with the same tag |
||
| 3100 | name as the token, then this is a parse error. */ |
||
| 3101 | // k |
||
| 3102 | |||
| 3103 | /* Pop elements from this stack until an element with the same |
||
| 3104 | tag name as the token has been popped from the stack. */ |
||
| 3105 | while(true) { |
||
| 3106 | $node = end($this->stack)->nodeName; |
||
| 3107 | array_pop($this->stack); |
||
| 3108 | |||
| 3109 | if($node === $token['name']) { |
||
| 3110 | break; |
||
| 3111 | } |
||
| 3112 | } |
||
| 3113 | |||
| 3114 | /* Clear the list of active formatting elements up to the last |
||
| 3115 | marker. */ |
||
| 3116 | $this->clearTheActiveFormattingElementsUpToTheLastMarker(); |
||
| 3117 | |||
| 3118 | /* Switch the insertion mode to "in row". (The current node |
||
| 3119 | will be a tr element at this point.) */ |
||
| 3120 | $this->mode = self::IN_ROW; |
||
| 3121 | } |
||
| 3122 | |||
| 3123 | /* A start tag whose tag name is one of: "caption", "col", "colgroup", |
||
| 3124 | "tbody", "td", "tfoot", "th", "thead", "tr" */ |
||
| 3125 | } elseif($token['type'] === HTML5::STARTTAG && in_array($token['name'], |
||
| 3126 | array('caption', 'col', 'colgroup', 'tbody', 'td', 'tfoot', 'th', |
||
| 3127 | 'thead', 'tr'))) { |
||
| 3128 | /* If the stack of open elements does not have a td or th element |
||
| 3129 | in table scope, then this is a parse error; ignore the token. |
||
| 3130 | (innerHTML case) */ |
||
| 3131 | if(!$this->elementInScope(array('td', 'th'), true)) { |
||
| 3132 | // Ignore. |
||
| 3133 | |||
| 3134 | /* Otherwise, close the cell (see below) and reprocess the current |
||
| 3135 | token. */ |
||
| 3136 | } else { |
||
| 3137 | $this->closeCell(); |
||
| 3138 | return $this->inRow($token); |
||
| 3139 | } |
||
| 3140 | |||
| 3141 | /* A start tag whose tag name is one of: "caption", "col", "colgroup", |
||
| 3142 | "tbody", "td", "tfoot", "th", "thead", "tr" */ |
||
| 3143 | } elseif($token['type'] === HTML5::STARTTAG && in_array($token['name'], |
||
| 3144 | array('caption', 'col', 'colgroup', 'tbody', 'td', 'tfoot', 'th', |
||
| 3145 | 'thead', 'tr'))) { |
||
| 3146 | /* If the stack of open elements does not have a td or th element |
||
| 3147 | in table scope, then this is a parse error; ignore the token. |
||
| 3148 | (innerHTML case) */ |
||
| 3149 | if(!$this->elementInScope(array('td', 'th'), true)) { |
||
| 3150 | // Ignore. |
||
| 3151 | |||
| 3152 | /* Otherwise, close the cell (see below) and reprocess the current |
||
| 3153 | token. */ |
||
| 3154 | } else { |
||
| 3155 | $this->closeCell(); |
||
| 3156 | return $this->inRow($token); |
||
| 3157 | } |
||
| 3158 | |||
| 3159 | /* An end tag whose tag name is one of: "body", "caption", "col", |
||
| 3160 | "colgroup", "html" */ |
||
| 3161 | } elseif($token['type'] === HTML5::ENDTAG && in_array($token['name'], |
||
| 3162 | array('body', 'caption', 'col', 'colgroup', 'html'))) { |
||
| 3163 | /* Parse error. Ignore the token. */ |
||
| 3164 | |||
| 3165 | /* An end tag whose tag name is one of: "table", "tbody", "tfoot", |
||
| 3166 | "thead", "tr" */ |
||
| 3167 | } elseif($token['type'] === HTML5::ENDTAG && in_array($token['name'], |
||
| 3168 | array('table', 'tbody', 'tfoot', 'thead', 'tr'))) { |
||
| 3169 | /* If the stack of open elements does not have an element in table |
||
| 3170 | scope with the same tag name as that of the token (which can only |
||
| 3171 | happen for "tbody", "tfoot" and "thead", or, in the innerHTML case), |
||
| 3172 | then this is a parse error and the token must be ignored. */ |
||
| 3173 | if(!$this->elementInScope($token['name'], true)) { |
||
| 3174 | // Ignore. |
||
| 3175 | |||
| 3176 | /* Otherwise, close the cell (see below) and reprocess the current |
||
| 3177 | token. */ |
||
| 3178 | } else { |
||
| 3179 | $this->closeCell(); |
||
| 3180 | return $this->inRow($token); |
||
| 3181 | } |
||
| 3182 | |||
| 3183 | /* Anything else */ |
||
| 3184 | } else { |
||
| 3185 | /* Process the token as if the insertion mode was "in body". */ |
||
| 3186 | $this->inBody($token); |
||
| 3187 | } |
||
| 3188 | } |
||
| 3189 | |||
| 3190 | private function inSelect($token) |
||
| 3191 | { |
||
| 3192 | /* Handle the token as follows: */ |
||
| 3193 | |||
| 3194 | /* A character token */ |
||
| 3195 | if($token['type'] === HTML5::CHARACTR) { |
||
| 3196 | /* Append the token's character to the current node. */ |
||
| 3197 | $this->insertText($token['data']); |
||
| 3198 | |||
| 3199 | /* A comment token */ |
||
| 3200 | } elseif($token['type'] === HTML5::COMMENT) { |
||
| 3201 | /* Append a Comment node to the current node with the data |
||
| 3202 | attribute set to the data given in the comment token. */ |
||
| 3203 | $this->insertComment($token['data']); |
||
| 3204 | |||
| 3205 | /* A start tag token whose tag name is "option" */ |
||
| 3206 | } elseif($token['type'] === HTML5::STARTTAG && |
||
| 3207 | $token['name'] === 'option') { |
||
| 3208 | /* If the current node is an option element, act as if an end tag |
||
| 3209 | with the tag name "option" had been seen. */ |
||
| 3210 | if(end($this->stack)->nodeName === 'option') { |
||
| 3211 | $this->inSelect(array( |
||
| 3212 | 'name' => 'option', |
||
| 3213 | 'type' => HTML5::ENDTAG |
||
| 3214 | )); |
||
| 3215 | } |
||
| 3216 | |||
| 3217 | /* Insert an HTML element for the token. */ |
||
| 3218 | $this->insertElement($token); |
||
| 3219 | |||
| 3220 | /* A start tag token whose tag name is "optgroup" */ |
||
| 3221 | } elseif($token['type'] === HTML5::STARTTAG && |
||
| 3222 | $token['name'] === 'optgroup') { |
||
| 3223 | /* If the current node is an option element, act as if an end tag |
||
| 3224 | with the tag name "option" had been seen. */ |
||
| 3225 | if(end($this->stack)->nodeName === 'option') { |
||
| 3226 | $this->inSelect(array( |
||
| 3227 | 'name' => 'option', |
||
| 3228 | 'type' => HTML5::ENDTAG |
||
| 3229 | )); |
||
| 3230 | } |
||
| 3231 | |||
| 3232 | /* If the current node is an optgroup element, act as if an end tag |
||
| 3233 | with the tag name "optgroup" had been seen. */ |
||
| 3234 | if(end($this->stack)->nodeName === 'optgroup') { |
||
| 3235 | $this->inSelect(array( |
||
| 3236 | 'name' => 'optgroup', |
||
| 3237 | 'type' => HTML5::ENDTAG |
||
| 3238 | )); |
||
| 3239 | } |
||
| 3240 | |||
| 3241 | /* Insert an HTML element for the token. */ |
||
| 3242 | $this->insertElement($token); |
||
| 3243 | |||
| 3244 | /* An end tag token whose tag name is "optgroup" */ |
||
| 3245 | } elseif($token['type'] === HTML5::ENDTAG && |
||
| 3246 | $token['name'] === 'optgroup') { |
||
| 3247 | /* First, if the current node is an option element, and the node |
||
| 3248 | immediately before it in the stack of open elements is an optgroup |
||
| 3249 | element, then act as if an end tag with the tag name "option" had |
||
| 3250 | been seen. */ |
||
| 3251 | $elements_in_stack = count($this->stack); |
||
| 3252 | |||
| 3253 | if($this->stack[$elements_in_stack - 1]->nodeName === 'option' && |
||
| 3254 | $this->stack[$elements_in_stack - 2]->nodeName === 'optgroup') { |
||
| 3255 | $this->inSelect(array( |
||
| 3256 | 'name' => 'option', |
||
| 3257 | 'type' => HTML5::ENDTAG |
||
| 3258 | )); |
||
| 3259 | } |
||
| 3260 | |||
| 3261 | /* If the current node is an optgroup element, then pop that node |
||
| 3262 | from the stack of open elements. Otherwise, this is a parse error, |
||
| 3263 | ignore the token. */ |
||
| 3264 | if($this->stack[$elements_in_stack - 1] === 'optgroup') { |
||
| 3265 | array_pop($this->stack); |
||
| 3266 | } |
||
| 3267 | |||
| 3268 | /* An end tag token whose tag name is "option" */ |
||
| 3269 | } elseif($token['type'] === HTML5::ENDTAG && |
||
| 3270 | $token['name'] === 'option') { |
||
| 3271 | /* If the current node is an option element, then pop that node |
||
| 3272 | from the stack of open elements. Otherwise, this is a parse error, |
||
| 3273 | ignore the token. */ |
||
| 3274 | if(end($this->stack)->nodeName === 'option') { |
||
| 3275 | array_pop($this->stack); |
||
| 3276 | } |
||
| 3277 | |||
| 3278 | /* An end tag whose tag name is "select" */ |
||
| 3279 | } elseif($token['type'] === HTML5::ENDTAG && |
||
| 3280 | $token['name'] === 'select') { |
||
| 3281 | /* If the stack of open elements does not have an element in table |
||
| 3282 | scope with the same tag name as the token, this is a parse error. |
||
| 3283 | Ignore the token. (innerHTML case) */ |
||
| 3284 | if(!$this->elementInScope($token['name'], true)) { |
||
| 3285 | // w/e |
||
| 3286 | |||
| 3287 | /* Otherwise: */ |
||
| 3288 | } else { |
||
| 3289 | /* Pop elements from the stack of open elements until a select |
||
| 3290 | element has been popped from the stack. */ |
||
| 3291 | while(true) { |
||
| 3292 | $current = end($this->stack)->nodeName; |
||
| 3293 | array_pop($this->stack); |
||
| 3294 | |||
| 3295 | if($current === 'select') { |
||
| 3296 | break; |
||
| 3297 | } |
||
| 3298 | } |
||
| 3299 | |||
| 3300 | /* Reset the insertion mode appropriately. */ |
||
| 3301 | $this->resetInsertionMode(); |
||
| 3302 | } |
||
| 3303 | |||
| 3304 | /* A start tag whose tag name is "select" */ |
||
| 3305 | } elseif($token['name'] === 'select' && |
||
| 3306 | $token['type'] === HTML5::STARTTAG) { |
||
| 3307 | /* Parse error. Act as if the token had been an end tag with the |
||
| 3308 | tag name "select" instead. */ |
||
| 3309 | $this->inSelect(array( |
||
| 3310 | 'name' => 'select', |
||
| 3311 | 'type' => HTML5::ENDTAG |
||
| 3312 | )); |
||
| 3313 | |||
| 3314 | /* An end tag whose tag name is one of: "caption", "table", "tbody", |
||
| 3315 | "tfoot", "thead", "tr", "td", "th" */ |
||
| 3316 | } elseif(in_array($token['name'], array('caption', 'table', 'tbody', |
||
| 3317 | 'tfoot', 'thead', 'tr', 'td', 'th')) && $token['type'] === HTML5::ENDTAG) { |
||
| 3318 | /* Parse error. */ |
||
| 3319 | // w/e |
||
| 3320 | |||
| 3321 | /* If the stack of open elements has an element in table scope with |
||
| 3322 | the same tag name as that of the token, then act as if an end tag |
||
| 3323 | with the tag name "select" had been seen, and reprocess the token. |
||
| 3324 | Otherwise, ignore the token. */ |
||
| 3325 | if($this->elementInScope($token['name'], true)) { |
||
| 3326 | $this->inSelect(array( |
||
| 3327 | 'name' => 'select', |
||
| 3328 | 'type' => HTML5::ENDTAG |
||
| 3329 | )); |
||
| 3330 | |||
| 3331 | $this->mainPhase($token); |
||
| 3332 | } |
||
| 3333 | |||
| 3334 | /* Anything else */ |
||
| 3335 | } else { |
||
| 3336 | /* Parse error. Ignore the token. */ |
||
| 3337 | } |
||
| 3338 | } |
||
| 3339 | |||
| 3340 | private function afterBody($token) |
||
| 3341 | { |
||
| 3342 | /* Handle the token as follows: */ |
||
| 3343 | |||
| 3344 | /* A character token that is one of one of U+0009 CHARACTER TABULATION, |
||
| 3345 | U+000A LINE FEED (LF), U+000B LINE TABULATION, U+000C FORM FEED (FF), |
||
| 3346 | or U+0020 SPACE */ |
||
| 3347 | if($token['type'] === HTML5::CHARACTR && |
||
| 3348 | preg_match('/^[\t\n\x0b\x0c ]+$/', $token['data'])) { |
||
| 3349 | /* Process the token as it would be processed if the insertion mode |
||
| 3350 | was "in body". */ |
||
| 3351 | $this->inBody($token); |
||
| 3352 | |||
| 3353 | /* A comment token */ |
||
| 3354 | } elseif($token['type'] === HTML5::COMMENT) { |
||
| 3355 | /* Append a Comment node to the first element in the stack of open |
||
| 3356 | elements (the html element), with the data attribute set to the |
||
| 3357 | data given in the comment token. */ |
||
| 3358 | $comment = $this->dom->createComment($token['data']); |
||
| 3359 | $this->stack[0]->appendChild($comment); |
||
| 3360 | |||
| 3361 | /* An end tag with the tag name "html" */ |
||
| 3362 | } elseif($token['type'] === HTML5::ENDTAG && $token['name'] === 'html') { |
||
| 3363 | /* If the parser was originally created in order to handle the |
||
| 3364 | setting of an element's innerHTML attribute, this is a parse error; |
||
| 3365 | ignore the token. (The element will be an html element in this |
||
| 3366 | case.) (innerHTML case) */ |
||
| 3367 | |||
| 3368 | /* Otherwise, switch to the trailing end phase. */ |
||
| 3369 | $this->phase = self::END_PHASE; |
||
| 3370 | |||
| 3371 | /* Anything else */ |
||
| 3372 | } else { |
||
| 3373 | /* Parse error. Set the insertion mode to "in body" and reprocess |
||
| 3374 | the token. */ |
||
| 3375 | $this->mode = self::IN_BODY; |
||
| 3376 | return $this->inBody($token); |
||
| 3377 | } |
||
| 3378 | } |
||
| 3379 | |||
| 3380 | private function inFrameset($token) |
||
| 3381 | { |
||
| 3382 | /* Handle the token as follows: */ |
||
| 3383 | |||
| 3384 | /* A character token that is one of one of U+0009 CHARACTER TABULATION, |
||
| 3385 | U+000A LINE FEED (LF), U+000B LINE TABULATION, U+000C FORM FEED (FF), |
||
| 3386 | U+000D CARRIAGE RETURN (CR), or U+0020 SPACE */ |
||
| 3387 | if($token['type'] === HTML5::CHARACTR && |
||
| 3388 | preg_match('/^[\t\n\x0b\x0c ]+$/', $token['data'])) { |
||
| 3389 | /* Append the character to the current node. */ |
||
| 3390 | $this->insertText($token['data']); |
||
| 3391 | |||
| 3392 | /* A comment token */ |
||
| 3393 | } elseif($token['type'] === HTML5::COMMENT) { |
||
| 3394 | /* Append a Comment node to the current node with the data |
||
| 3395 | attribute set to the data given in the comment token. */ |
||
| 3396 | $this->insertComment($token['data']); |
||
| 3397 | |||
| 3398 | /* A start tag with the tag name "frameset" */ |
||
| 3399 | } elseif($token['name'] === 'frameset' && |
||
| 3400 | $token['type'] === HTML5::STARTTAG) { |
||
| 3401 | $this->insertElement($token); |
||
| 3402 | |||
| 3403 | /* An end tag with the tag name "frameset" */ |
||
| 3404 | } elseif($token['name'] === 'frameset' && |
||
| 3405 | $token['type'] === HTML5::ENDTAG) { |
||
| 3406 | /* If the current node is the root html element, then this is a |
||
| 3407 | parse error; ignore the token. (innerHTML case) */ |
||
| 3408 | if(end($this->stack)->nodeName === 'html') { |
||
| 3409 | // Ignore |
||
| 3410 | |||
| 3411 | } else { |
||
| 3412 | /* Otherwise, pop the current node from the stack of open |
||
| 3413 | elements. */ |
||
| 3414 | array_pop($this->stack); |
||
| 3415 | |||
| 3416 | /* If the parser was not originally created in order to handle |
||
| 3417 | the setting of an element's innerHTML attribute (innerHTML case), |
||
| 3418 | and the current node is no longer a frameset element, then change |
||
| 3419 | the insertion mode to "after frameset". */ |
||
| 3420 | $this->mode = self::AFTR_FRAME; |
||
| 3421 | } |
||
| 3422 | |||
| 3423 | /* A start tag with the tag name "frame" */ |
||
| 3424 | } elseif($token['name'] === 'frame' && |
||
| 3425 | $token['type'] === HTML5::STARTTAG) { |
||
| 3426 | /* Insert an HTML element for the token. */ |
||
| 3427 | $this->insertElement($token); |
||
| 3428 | |||
| 3429 | /* Immediately pop the current node off the stack of open elements. */ |
||
| 3430 | array_pop($this->stack); |
||
| 3431 | |||
| 3432 | /* A start tag with the tag name "noframes" */ |
||
| 3433 | } elseif($token['name'] === 'noframes' && |
||
| 3434 | $token['type'] === HTML5::STARTTAG) { |
||
| 3435 | /* Process the token as if the insertion mode had been "in body". */ |
||
| 3436 | $this->inBody($token); |
||
| 3437 | |||
| 3438 | /* Anything else */ |
||
| 3439 | } else { |
||
| 3440 | /* Parse error. Ignore the token. */ |
||
| 3441 | } |
||
| 3442 | } |
||
| 3443 | |||
| 3444 | private function afterFrameset($token) |
||
| 3445 | { |
||
| 3446 | /* Handle the token as follows: */ |
||
| 3447 | |||
| 3448 | /* A character token that is one of one of U+0009 CHARACTER TABULATION, |
||
| 3449 | U+000A LINE FEED (LF), U+000B LINE TABULATION, U+000C FORM FEED (FF), |
||
| 3450 | U+000D CARRIAGE RETURN (CR), or U+0020 SPACE */ |
||
| 3451 | if($token['type'] === HTML5::CHARACTR && |
||
| 3452 | preg_match('/^[\t\n\x0b\x0c ]+$/', $token['data'])) { |
||
| 3453 | /* Append the character to the current node. */ |
||
| 3454 | $this->insertText($token['data']); |
||
| 3455 | |||
| 3456 | /* A comment token */ |
||
| 3457 | } elseif($token['type'] === HTML5::COMMENT) { |
||
| 3458 | /* Append a Comment node to the current node with the data |
||
| 3459 | attribute set to the data given in the comment token. */ |
||
| 3460 | $this->insertComment($token['data']); |
||
| 3461 | |||
| 3462 | /* An end tag with the tag name "html" */ |
||
| 3463 | } elseif($token['name'] === 'html' && |
||
| 3464 | $token['type'] === HTML5::ENDTAG) { |
||
| 3465 | /* Switch to the trailing end phase. */ |
||
| 3466 | $this->phase = self::END_PHASE; |
||
| 3467 | |||
| 3468 | /* A start tag with the tag name "noframes" */ |
||
| 3469 | } elseif($token['name'] === 'noframes' && |
||
| 3470 | $token['type'] === HTML5::STARTTAG) { |
||
| 3471 | /* Process the token as if the insertion mode had been "in body". */ |
||
| 3472 | $this->inBody($token); |
||
| 3473 | |||
| 3474 | /* Anything else */ |
||
| 3475 | } else { |
||
| 3476 | /* Parse error. Ignore the token. */ |
||
| 3477 | } |
||
| 3478 | } |
||
| 3479 | |||
| 3480 | private function trailingEndPhase($token) |
||
| 3481 | { |
||
| 3482 | /* After the main phase, as each token is emitted from the tokenisation |
||
| 3483 | stage, it must be processed as described in this section. */ |
||
| 3484 | |||
| 3485 | /* A DOCTYPE token */ |
||
| 3486 | if($token['type'] === HTML5::DOCTYPE) { |
||
| 3487 | // Parse error. Ignore the token. |
||
| 3488 | |||
| 3489 | /* A comment token */ |
||
| 3490 | } elseif($token['type'] === HTML5::COMMENT) { |
||
| 3491 | /* Append a Comment node to the Document object with the data |
||
| 3492 | attribute set to the data given in the comment token. */ |
||
| 3493 | $comment = $this->dom->createComment($token['data']); |
||
| 3494 | $this->dom->appendChild($comment); |
||
| 3495 | |||
| 3496 | /* A character token that is one of one of U+0009 CHARACTER TABULATION, |
||
| 3497 | U+000A LINE FEED (LF), U+000B LINE TABULATION, U+000C FORM FEED (FF), |
||
| 3498 | or U+0020 SPACE */ |
||
| 3499 | } elseif($token['type'] === HTML5::CHARACTR && |
||
| 3500 | preg_match('/^[\t\n\x0b\x0c ]+$/', $token['data'])) { |
||
| 3501 | /* Process the token as it would be processed in the main phase. */ |
||
| 3502 | $this->mainPhase($token); |
||
| 3503 | |||
| 3504 | /* A character token that is not one of U+0009 CHARACTER TABULATION, |
||
| 3505 | U+000A LINE FEED (LF), U+000B LINE TABULATION, U+000C FORM FEED (FF), |
||
| 3506 | or U+0020 SPACE. Or a start tag token. Or an end tag token. */ |
||
| 3507 | } elseif(($token['type'] === HTML5::CHARACTR && |
||
| 3508 | preg_match('/^[\t\n\x0b\x0c ]+$/', $token['data'])) || |
||
| 3509 | $token['type'] === HTML5::STARTTAG || $token['type'] === HTML5::ENDTAG) { |
||
| 3510 | /* Parse error. Switch back to the main phase and reprocess the |
||
| 3511 | token. */ |
||
| 3512 | $this->phase = self::MAIN_PHASE; |
||
| 3513 | return $this->mainPhase($token); |
||
| 3514 | |||
| 3515 | /* An end-of-file token */ |
||
| 3516 | } elseif($token['type'] === HTML5::EOF) { |
||
| 3517 | /* OMG DONE!! */ |
||
| 3518 | } |
||
| 3519 | } |
||
| 3520 | |||
| 3521 | private function insertElement($token, $append = true) |
||
| 3522 | { |
||
| 3523 | $el = $this->dom->createElement($token['name']); |
||
| 3524 | |||
| 3525 | foreach($token['attr'] as $attr) { |
||
| 3526 | if(!$el->hasAttribute($attr['name'])) { |
||
| 3527 | $el->setAttribute($attr['name'], $attr['value']); |
||
| 3528 | } |
||
| 3529 | } |
||
| 3530 | |||
| 3531 | $this->appendToRealParent($el); |
||
| 3532 | $this->stack[] = $el; |
||
| 3533 | |||
| 3534 | return $el; |
||
| 3535 | } |
||
| 3536 | |||
| 3537 | private function insertText($data) |
||
| 3538 | { |
||
| 3539 | $text = $this->dom->createTextNode($data); |
||
| 3540 | $this->appendToRealParent($text); |
||
| 3541 | } |
||
| 3542 | |||
| 3543 | private function insertComment($data) |
||
| 3544 | { |
||
| 3545 | $comment = $this->dom->createComment($data); |
||
| 3546 | $this->appendToRealParent($comment); |
||
| 3547 | } |
||
| 3548 | |||
| 3549 | private function appendToRealParent($node) |
||
| 3550 | { |
||
| 3551 | if($this->foster_parent === null) { |
||
| 3552 | end($this->stack)->appendChild($node); |
||
| 3553 | |||
| 3554 | } elseif($this->foster_parent !== null) { |
||
| 3555 | /* If the foster parent element is the parent element of the |
||
| 3556 | last table element in the stack of open elements, then the new |
||
| 3557 | node must be inserted immediately before the last table element |
||
| 3558 | in the stack of open elements in the foster parent element; |
||
| 3559 | otherwise, the new node must be appended to the foster parent |
||
| 3560 | element. */ |
||
| 3561 | for($n = count($this->stack) - 1; $n >= 0; $n--) { |
||
| 3562 | if($this->stack[$n]->nodeName === 'table' && |
||
| 3563 | $this->stack[$n]->parentNode !== null) { |
||
| 3564 | $table = $this->stack[$n]; |
||
| 3565 | break; |
||
| 3566 | } |
||
| 3567 | } |
||
| 3568 | |||
| 3569 | if(isset($table) && $this->foster_parent->isSameNode($table->parentNode)) |
||
| 3570 | $this->foster_parent->insertBefore($node, $table); |
||
| 3571 | else |
||
| 3572 | $this->foster_parent->appendChild($node); |
||
| 3573 | |||
| 3574 | $this->foster_parent = null; |
||
| 3575 | } |
||
| 3576 | } |
||
| 3577 | |||
| 3578 | private function elementInScope($el, $table = false) |
||
| 3579 | { |
||
| 3580 | if(is_array($el)) { |
||
| 3581 | foreach($el as $element) { |
||
| 3582 | if($this->elementInScope($element, $table)) { |
||
| 3583 | return true; |
||
| 3584 | } |
||
| 3585 | } |
||
| 3586 | |||
| 3587 | return false; |
||
| 3588 | } |
||
| 3589 | |||
| 3590 | $leng = count($this->stack); |
||
| 3591 | |||
| 3592 | for($n = 0; $n < $leng; $n++) { |
||
| 3593 | /* 1. Initialise node to be the current node (the bottommost node of |
||
| 3594 | the stack). */ |
||
| 3595 | $node = $this->stack[$leng - 1 - $n]; |
||
| 3596 | |||
| 3597 | if($node->tagName === $el) { |
||
| 3598 | /* 2. If node is the target node, terminate in a match state. */ |
||
| 3599 | return true; |
||
| 3600 | |||
| 3601 | } elseif($node->tagName === 'table') { |
||
| 3602 | /* 3. Otherwise, if node is a table element, terminate in a failure |
||
| 3603 | state. */ |
||
| 3604 | return false; |
||
| 3605 | |||
| 3606 | } elseif($table === true && in_array($node->tagName, array('caption', 'td', |
||
| 3607 | 'th', 'button', 'marquee', 'object'))) { |
||
| 3608 | /* 4. Otherwise, if the algorithm is the "has an element in scope" |
||
| 3609 | variant (rather than the "has an element in table scope" variant), |
||
| 3610 | and node is one of the following, terminate in a failure state. */ |
||
| 3611 | return false; |
||
| 3612 | |||
| 3613 | } elseif($node === $node->ownerDocument->documentElement) { |
||
| 3614 | /* 5. Otherwise, if node is an html element (root element), terminate |
||
| 3615 | in a failure state. (This can only happen if the node is the topmost |
||
| 3616 | node of the stack of open elements, and prevents the next step from |
||
| 3617 | being invoked if there are no more elements in the stack.) */ |
||
| 3618 | return false; |
||
| 3619 | } |
||
| 3620 | |||
| 3621 | /* Otherwise, set node to the previous entry in the stack of open |
||
| 3622 | elements and return to step 2. (This will never fail, since the loop |
||
| 3623 | will always terminate in the previous step if the top of the stack |
||
| 3624 | is reached.) */ |
||
| 3625 | } |
||
| 3626 | } |
||
| 3627 | |||
| 3628 | private function reconstructActiveFormattingElements() |
||
| 3629 | { |
||
| 3630 | /* 1. If there are no entries in the list of active formatting elements, |
||
| 3631 | then there is nothing to reconstruct; stop this algorithm. */ |
||
| 3632 | $formatting_elements = count($this->a_formatting); |
||
| 3633 | |||
| 3634 | if($formatting_elements === 0) { |
||
| 3635 | return false; |
||
| 3636 | } |
||
| 3637 | |||
| 3638 | /* 3. Let entry be the last (most recently added) element in the list |
||
| 3639 | of active formatting elements. */ |
||
| 3640 | $entry = end($this->a_formatting); |
||
| 3641 | |||
| 3642 | /* 2. If the last (most recently added) entry in the list of active |
||
| 3643 | formatting elements is a marker, or if it is an element that is in the |
||
| 3644 | stack of open elements, then there is nothing to reconstruct; stop this |
||
| 3645 | algorithm. */ |
||
| 3646 | if($entry === self::MARKER || in_array($entry, $this->stack, true)) { |
||
| 3647 | return false; |
||
| 3648 | } |
||
| 3649 | |||
| 3650 | for($a = $formatting_elements - 1; $a >= 0; true) { |
||
| 3651 | /* 4. If there are no entries before entry in the list of active |
||
| 3652 | formatting elements, then jump to step 8. */ |
||
| 3653 | if($a === 0) { |
||
| 3654 | $step_seven = false; |
||
| 3655 | break; |
||
| 3656 | } |
||
| 3657 | |||
| 3658 | /* 5. Let entry be the entry one earlier than entry in the list of |
||
| 3659 | active formatting elements. */ |
||
| 3660 | $a--; |
||
| 3661 | $entry = $this->a_formatting[$a]; |
||
| 3662 | |||
| 3663 | /* 6. If entry is neither a marker nor an element that is also in |
||
| 3664 | thetack of open elements, go to step 4. */ |
||
| 3665 | if($entry === self::MARKER || in_array($entry, $this->stack, true)) { |
||
| 3666 | break; |
||
| 3667 | } |
||
| 3668 | } |
||
| 3669 | |||
| 3670 | while(true) { |
||
| 3671 | /* 7. Let entry be the element one later than entry in the list of |
||
| 3672 | active formatting elements. */ |
||
| 3673 | if(isset($step_seven) && $step_seven === true) { |
||
| 3674 | $a++; |
||
| 3675 | $entry = $this->a_formatting[$a]; |
||
| 3676 | } |
||
| 3677 | |||
| 3678 | /* 8. Perform a shallow clone of the element entry to obtain clone. */ |
||
| 3679 | $clone = $entry->cloneNode(); |
||
| 3680 | |||
| 3681 | /* 9. Append clone to the current node and push it onto the stack |
||
| 3682 | of open elements so that it is the new current node. */ |
||
| 3683 | end($this->stack)->appendChild($clone); |
||
| 3684 | $this->stack[] = $clone; |
||
| 3685 | |||
| 3686 | /* 10. Replace the entry for entry in the list with an entry for |
||
| 3687 | clone. */ |
||
| 3688 | $this->a_formatting[$a] = $clone; |
||
| 3689 | |||
| 3690 | /* 11. If the entry for clone in the list of active formatting |
||
| 3691 | elements is not the last entry in the list, return to step 7. */ |
||
| 3692 | if(end($this->a_formatting) !== $clone) { |
||
| 3693 | $step_seven = true; |
||
| 3694 | } else { |
||
| 3695 | break; |
||
| 3696 | } |
||
| 3697 | } |
||
| 3698 | } |
||
| 3699 | |||
| 3700 | private function clearTheActiveFormattingElementsUpToTheLastMarker() |
||
| 3701 | { |
||
| 3702 | /* When the steps below require the UA to clear the list of active |
||
| 3703 | formatting elements up to the last marker, the UA must perform the |
||
| 3704 | following steps: */ |
||
| 3705 | |||
| 3706 | while(true) { |
||
| 3707 | /* 1. Let entry be the last (most recently added) entry in the list |
||
| 3708 | of active formatting elements. */ |
||
| 3709 | $entry = end($this->a_formatting); |
||
| 3710 | |||
| 3711 | /* 2. Remove entry from the list of active formatting elements. */ |
||
| 3712 | array_pop($this->a_formatting); |
||
| 3713 | |||
| 3714 | /* 3. If entry was a marker, then stop the algorithm at this point. |
||
| 3715 | The list has been cleared up to the last marker. */ |
||
| 3716 | if($entry === self::MARKER) { |
||
| 3717 | break; |
||
| 3718 | } |
||
| 3719 | } |
||
| 3720 | } |
||
| 3721 | |||
| 3722 | private function generateImpliedEndTags(array $exclude = array()) |
||
| 3723 | { |
||
| 3724 | /* When the steps below require the UA to generate implied end tags, |
||
| 3725 | then, if the current node is a dd element, a dt element, an li element, |
||
| 3726 | a p element, a td element, a th element, or a tr element, the UA must |
||
| 3727 | act as if an end tag with the respective tag name had been seen and |
||
| 3728 | then generate implied end tags again. */ |
||
| 3729 | $node = end($this->stack); |
||
| 3730 | $elements = array_diff(array('dd', 'dt', 'li', 'p', 'td', 'th', 'tr'), $exclude); |
||
| 3731 | |||
| 3732 | while(in_array(end($this->stack)->nodeName, $elements)) { |
||
| 3733 | array_pop($this->stack); |
||
| 3734 | } |
||
| 3735 | } |
||
| 3736 | |||
| 3737 | private function getElementCategory($name) |
||
| 3738 | { |
||
| 3739 | if(in_array($name, $this->special)) |
||
| 3740 | return self::SPECIAL; |
||
| 3741 | |||
| 3742 | elseif(in_array($name, $this->scoping)) |
||
| 3743 | return self::SCOPING; |
||
| 3744 | |||
| 3745 | elseif(in_array($name, $this->formatting)) |
||
| 3746 | return self::FORMATTING; |
||
| 3747 | |||
| 3748 | else |
||
| 3749 | return self::PHRASING; |
||
| 3750 | } |
||
| 3751 | |||
| 3752 | private function clearStackToTableContext($elements) |
||
| 3753 | { |
||
| 3754 | /* When the steps above require the UA to clear the stack back to a |
||
| 3755 | table context, it means that the UA must, while the current node is not |
||
| 3756 | a table element or an html element, pop elements from the stack of open |
||
| 3757 | elements. If this causes any elements to be popped from the stack, then |
||
| 3758 | this is a parse error. */ |
||
| 3759 | while(true) { |
||
| 3760 | $node = end($this->stack)->nodeName; |
||
| 3761 | |||
| 3762 | if(in_array($node, $elements)) { |
||
| 3763 | break; |
||
| 3764 | } else { |
||
| 3765 | array_pop($this->stack); |
||
| 3766 | } |
||
| 3767 | } |
||
| 3768 | } |
||
| 3769 | |||
| 3770 | private function resetInsertionMode() |
||
| 3771 | { |
||
| 3772 | /* 1. Let last be false. */ |
||
| 3773 | $last = false; |
||
| 3774 | $leng = count($this->stack); |
||
| 3775 | |||
| 3776 | for($n = $leng - 1; $n >= 0; $n--) { |
||
| 3777 | /* 2. Let node be the last node in the stack of open elements. */ |
||
| 3778 | $node = $this->stack[$n]; |
||
| 3779 | |||
| 3780 | /* 3. If node is the first node in the stack of open elements, then |
||
| 3781 | set last to true. If the element whose innerHTML attribute is being |
||
| 3782 | set is neither a td element nor a th element, then set node to the |
||
| 3783 | element whose innerHTML attribute is being set. (innerHTML case) */ |
||
| 3784 | if($this->stack[0]->isSameNode($node)) { |
||
| 3785 | $last = true; |
||
| 3786 | } |
||
| 3787 | |||
| 3788 | /* 4. If node is a select element, then switch the insertion mode to |
||
| 3789 | "in select" and abort these steps. (innerHTML case) */ |
||
| 3790 | if($node->nodeName === 'select') { |
||
| 3791 | $this->mode = self::IN_SELECT; |
||
| 3792 | break; |
||
| 3793 | |||
| 3794 | /* 5. If node is a td or th element, then switch the insertion mode |
||
| 3795 | to "in cell" and abort these steps. */ |
||
| 3796 | } elseif($node->nodeName === 'td' || $node->nodeName === 'th') { |
||
| 3797 | $this->mode = self::IN_CELL; |
||
| 3798 | break; |
||
| 3799 | |||
| 3800 | /* 6. If node is a tr element, then switch the insertion mode to |
||
| 3801 | "in row" and abort these steps. */ |
||
| 3802 | } elseif($node->nodeName === 'tr') { |
||
| 3803 | $this->mode = self::IN_ROW; |
||
| 3804 | break; |
||
| 3805 | |||
| 3806 | /* 7. If node is a tbody, thead, or tfoot element, then switch the |
||
| 3807 | insertion mode to "in table body" and abort these steps. */ |
||
| 3808 | } elseif(in_array($node->nodeName, array('tbody', 'thead', 'tfoot'))) { |
||
| 3809 | $this->mode = self::IN_TBODY; |
||
| 3810 | break; |
||
| 3811 | |||
| 3812 | /* 8. If node is a caption element, then switch the insertion mode |
||
| 3813 | to "in caption" and abort these steps. */ |
||
| 3814 | } elseif($node->nodeName === 'caption') { |
||
| 3815 | $this->mode = self::IN_CAPTION; |
||
| 3816 | break; |
||
| 3817 | |||
| 3818 | /* 9. If node is a colgroup element, then switch the insertion mode |
||
| 3819 | to "in column group" and abort these steps. (innerHTML case) */ |
||
| 3820 | } elseif($node->nodeName === 'colgroup') { |
||
| 3821 | $this->mode = self::IN_CGROUP; |
||
| 3822 | break; |
||
| 3823 | |||
| 3824 | /* 10. If node is a table element, then switch the insertion mode |
||
| 3825 | to "in table" and abort these steps. */ |
||
| 3826 | } elseif($node->nodeName === 'table') { |
||
| 3827 | $this->mode = self::IN_TABLE; |
||
| 3828 | break; |
||
| 3829 | |||
| 3830 | /* 11. If node is a head element, then switch the insertion mode |
||
| 3831 | to "in body" ("in body"! not "in head"!) and abort these steps. |
||
| 3832 | (innerHTML case) */ |
||
| 3833 | } elseif($node->nodeName === 'head') { |
||
| 3834 | $this->mode = self::IN_BODY; |
||
| 3835 | break; |
||
| 3836 | |||
| 3837 | /* 12. If node is a body element, then switch the insertion mode to |
||
| 3838 | "in body" and abort these steps. */ |
||
| 3839 | } elseif($node->nodeName === 'body') { |
||
| 3840 | $this->mode = self::IN_BODY; |
||
| 3841 | break; |
||
| 3842 | |||
| 3843 | /* 13. If node is a frameset element, then switch the insertion |
||
| 3844 | mode to "in frameset" and abort these steps. (innerHTML case) */ |
||
| 3845 | } elseif($node->nodeName === 'frameset') { |
||
| 3846 | $this->mode = self::IN_FRAME; |
||
| 3847 | break; |
||
| 3848 | |||
| 3849 | /* 14. If node is an html element, then: if the head element |
||
| 3850 | pointer is null, switch the insertion mode to "before head", |
||
| 3851 | otherwise, switch the insertion mode to "after head". In either |
||
| 3852 | case, abort these steps. (innerHTML case) */ |
||
| 3853 | } elseif($node->nodeName === 'html') { |
||
| 3854 | $this->mode = ($this->head_pointer === null) |
||
| 3855 | ? self::BEFOR_HEAD |
||
| 3856 | : self::AFTER_HEAD; |
||
| 3857 | |||
| 3858 | break; |
||
| 3859 | |||
| 3860 | /* 15. If last is true, then set the insertion mode to "in body" |
||
| 3861 | and abort these steps. (innerHTML case) */ |
||
| 3862 | } elseif($last) { |
||
| 3863 | $this->mode = self::IN_BODY; |
||
| 3864 | break; |
||
| 3865 | } |
||
| 3866 | } |
||
| 3867 | } |
||
| 3868 | |||
| 3869 | private function closeCell() |
||
| 3870 | { |
||
| 3871 | /* If the stack of open elements has a td or th element in table scope, |
||
| 3872 | then act as if an end tag token with that tag name had been seen. */ |
||
| 3873 | foreach(array('td', 'th') as $cell) { |
||
| 3874 | if($this->elementInScope($cell, true)) { |
||
| 3875 | $this->inCell(array( |
||
| 3876 | 'name' => $cell, |
||
| 3877 | 'type' => HTML5::ENDTAG |
||
| 3878 | )); |
||
| 3879 | |||
| 3880 | break; |
||
| 3881 | } |
||
| 3882 | } |
||
| 3883 | } |
||
| 3884 | |||
| 3885 | public function save() |
||
| 3886 | { |
||
| 3887 | return $this->dom; |
||
| 3888 | } |
||
| 3890 |