Total Complexity | 571 |
Total Lines | 3210 |
Duplicated Lines | 0 % |
Changes | 0 |
Complex classes like HTML5TreeConstructer often do a lot of different things. To break such a class down, we need to identify a cohesive component within that class. A common approach to find such a component is to look for fields/methods that share the same prefixes, or suffixes.
Once you have determined the fields that belong together, you can apply the Extract Class refactoring. If the component makes sense as a sub-class, Extract Subclass is also a candidate, and is often faster.
While breaking up the class, it is a good idea to analyze how other classes use HTML5TreeConstructer, and based on these observations, apply Extract Interface, too.
1 | <?php |
||
1577 | class HTML5TreeConstructer |
||
1578 | { |
||
1579 | public $stack = array(); |
||
1580 | |||
1581 | private $phase; |
||
1582 | private $mode; |
||
1583 | private $dom; |
||
1584 | private $foster_parent = null; |
||
1585 | private $a_formatting = array(); |
||
1586 | |||
1587 | private $head_pointer = null; |
||
1588 | private $form_pointer = null; |
||
1589 | |||
1590 | private $scoping = array('button', 'caption', 'html', 'marquee', 'object', 'table', 'td', 'th'); |
||
1591 | private $formatting = array( |
||
1592 | 'a', |
||
1593 | 'b', |
||
1594 | 'big', |
||
1595 | 'em', |
||
1596 | 'font', |
||
1597 | 'i', |
||
1598 | 'nobr', |
||
1599 | 's', |
||
1600 | 'small', |
||
1601 | 'strike', |
||
1602 | 'strong', |
||
1603 | 'tt', |
||
1604 | 'u' |
||
1605 | ); |
||
1606 | private $special = array( |
||
1607 | 'address', |
||
1608 | 'area', |
||
1609 | 'base', |
||
1610 | 'basefont', |
||
1611 | 'bgsound', |
||
1612 | 'blockquote', |
||
1613 | 'body', |
||
1614 | 'br', |
||
1615 | 'center', |
||
1616 | 'col', |
||
1617 | 'colgroup', |
||
1618 | 'dd', |
||
1619 | 'dir', |
||
1620 | 'div', |
||
1621 | 'dl', |
||
1622 | 'dt', |
||
1623 | 'embed', |
||
1624 | 'fieldset', |
||
1625 | 'form', |
||
1626 | 'frame', |
||
1627 | 'frameset', |
||
1628 | 'h1', |
||
1629 | 'h2', |
||
1630 | 'h3', |
||
1631 | 'h4', |
||
1632 | 'h5', |
||
1633 | 'h6', |
||
1634 | 'head', |
||
1635 | 'hr', |
||
1636 | 'iframe', |
||
1637 | 'image', |
||
1638 | 'img', |
||
1639 | 'input', |
||
1640 | 'isindex', |
||
1641 | 'li', |
||
1642 | 'link', |
||
1643 | 'listing', |
||
1644 | 'menu', |
||
1645 | 'meta', |
||
1646 | 'noembed', |
||
1647 | 'noframes', |
||
1648 | 'noscript', |
||
1649 | 'ol', |
||
1650 | 'optgroup', |
||
1651 | 'option', |
||
1652 | 'p', |
||
1653 | 'param', |
||
1654 | 'plaintext', |
||
1655 | 'pre', |
||
1656 | 'script', |
||
1657 | 'select', |
||
1658 | 'spacer', |
||
1659 | 'style', |
||
1660 | 'tbody', |
||
1661 | 'textarea', |
||
1662 | 'tfoot', |
||
1663 | 'thead', |
||
1664 | 'title', |
||
1665 | 'tr', |
||
1666 | 'ul', |
||
1667 | 'wbr' |
||
1668 | ); |
||
1669 | |||
1670 | // The different phases. |
||
1671 | const INIT_PHASE = 0; |
||
1672 | const ROOT_PHASE = 1; |
||
1673 | const MAIN_PHASE = 2; |
||
1674 | const END_PHASE = 3; |
||
1675 | |||
1676 | // The different insertion modes for the main phase. |
||
1677 | const BEFOR_HEAD = 0; |
||
1678 | const IN_HEAD = 1; |
||
1679 | const AFTER_HEAD = 2; |
||
1680 | const IN_BODY = 3; |
||
1681 | const IN_TABLE = 4; |
||
1682 | const IN_CAPTION = 5; |
||
1683 | const IN_CGROUP = 6; |
||
1684 | const IN_TBODY = 7; |
||
1685 | const IN_ROW = 8; |
||
1686 | const IN_CELL = 9; |
||
1687 | const IN_SELECT = 10; |
||
1688 | const AFTER_BODY = 11; |
||
1689 | const IN_FRAME = 12; |
||
1690 | const AFTR_FRAME = 13; |
||
1691 | |||
1692 | // The different types of elements. |
||
1693 | const SPECIAL = 0; |
||
1694 | const SCOPING = 1; |
||
1695 | const FORMATTING = 2; |
||
1696 | const PHRASING = 3; |
||
1697 | |||
1698 | const MARKER = 0; |
||
1699 | |||
1700 | public function __construct() |
||
1701 | { |
||
1702 | $this->phase = self::INIT_PHASE; |
||
1703 | $this->mode = self::BEFOR_HEAD; |
||
1704 | $this->dom = new DOMDocument; |
||
1705 | |||
1706 | $this->dom->encoding = 'UTF-8'; |
||
1707 | $this->dom->preserveWhiteSpace = true; |
||
1708 | $this->dom->substituteEntities = true; |
||
1709 | $this->dom->strictErrorChecking = false; |
||
1710 | } |
||
1711 | |||
1712 | // Process tag tokens |
||
1713 | public function emitToken($token) |
||
1714 | { |
||
1715 | switch ($this->phase) { |
||
1716 | case self::INIT_PHASE: |
||
1717 | return $this->initPhase($token); |
||
1718 | break; |
||
1719 | case self::ROOT_PHASE: |
||
1720 | return $this->rootElementPhase($token); |
||
1721 | break; |
||
1722 | case self::MAIN_PHASE: |
||
1723 | return $this->mainPhase($token); |
||
1724 | break; |
||
1725 | case self::END_PHASE : |
||
1726 | return $this->trailingEndPhase($token); |
||
1727 | break; |
||
1728 | } |
||
1729 | } |
||
1730 | |||
1731 | private function initPhase($token) |
||
1732 | { |
||
1733 | /* Initially, the tree construction stage must handle each token |
||
1734 | emitted from the tokenisation stage as follows: */ |
||
1735 | |||
1736 | /* A DOCTYPE token that is marked as being in error |
||
1737 | A comment token |
||
1738 | A start tag token |
||
1739 | An end tag token |
||
1740 | A character token that is not one of one of U+0009 CHARACTER TABULATION, |
||
1741 | U+000A LINE FEED (LF), U+000B LINE TABULATION, U+000C FORM FEED (FF), |
||
1742 | or U+0020 SPACE |
||
1743 | An end-of-file token */ |
||
1744 | if ((isset($token['error']) && $token['error']) || |
||
1745 | $token['type'] === HTML5::COMMENT || |
||
1746 | $token['type'] === HTML5::STARTTAG || |
||
1747 | $token['type'] === HTML5::ENDTAG || |
||
1748 | $token['type'] === HTML5::EOF || |
||
1749 | ($token['type'] === HTML5::CHARACTR && isset($token['data']) && |
||
1750 | !preg_match('/^[\t\n\x0b\x0c ]+$/', $token['data'])) |
||
1751 | ) { |
||
1752 | /* This specification does not define how to handle this case. In |
||
1753 | particular, user agents may ignore the entirety of this specification |
||
1754 | altogether for such documents, and instead invoke special parse modes |
||
1755 | with a greater emphasis on backwards compatibility. */ |
||
1756 | |||
1757 | $this->phase = self::ROOT_PHASE; |
||
1758 | return $this->rootElementPhase($token); |
||
1759 | |||
1760 | /* A DOCTYPE token marked as being correct */ |
||
1761 | } elseif (isset($token['error']) && !$token['error']) { |
||
1762 | /* Append a DocumentType node to the Document node, with the name |
||
1763 | attribute set to the name given in the DOCTYPE token (which will be |
||
1764 | "HTML"), and the other attributes specific to DocumentType objects |
||
1765 | set to null, empty lists, or the empty string as appropriate. */ |
||
1766 | $doctype = new DOMDocumentType(null, null, 'HTML'); |
||
1767 | |||
1768 | /* Then, switch to the root element phase of the tree construction |
||
1769 | stage. */ |
||
1770 | $this->phase = self::ROOT_PHASE; |
||
1771 | |||
1772 | /* A character token that is one of one of U+0009 CHARACTER TABULATION, |
||
1773 | U+000A LINE FEED (LF), U+000B LINE TABULATION, U+000C FORM FEED (FF), |
||
1774 | or U+0020 SPACE */ |
||
1775 | } elseif (isset($token['data']) && preg_match( |
||
1776 | '/^[\t\n\x0b\x0c ]+$/', |
||
1777 | $token['data'] |
||
1778 | ) |
||
1779 | ) { |
||
1780 | /* Append that character to the Document node. */ |
||
1781 | $text = $this->dom->createTextNode($token['data']); |
||
1782 | $this->dom->appendChild($text); |
||
1783 | } |
||
1784 | } |
||
1785 | |||
1786 | private function rootElementPhase($token) |
||
1787 | { |
||
1788 | /* After the initial phase, as each token is emitted from the tokenisation |
||
1789 | stage, it must be processed as described in this section. */ |
||
1790 | |||
1791 | /* A DOCTYPE token */ |
||
1792 | if ($token['type'] === HTML5::DOCTYPE) { |
||
1793 | // Parse error. Ignore the token. |
||
1794 | |||
1795 | /* A comment token */ |
||
1796 | } elseif ($token['type'] === HTML5::COMMENT) { |
||
1797 | /* Append a Comment node to the Document object with the data |
||
1798 | attribute set to the data given in the comment token. */ |
||
1799 | $comment = $this->dom->createComment($token['data']); |
||
1800 | $this->dom->appendChild($comment); |
||
1801 | |||
1802 | /* A character token that is one of one of U+0009 CHARACTER TABULATION, |
||
1803 | U+000A LINE FEED (LF), U+000B LINE TABULATION, U+000C FORM FEED (FF), |
||
1804 | or U+0020 SPACE */ |
||
1805 | } elseif ($token['type'] === HTML5::CHARACTR && |
||
1806 | preg_match('/^[\t\n\x0b\x0c ]+$/', $token['data']) |
||
1807 | ) { |
||
1808 | /* Append that character to the Document node. */ |
||
1809 | $text = $this->dom->createTextNode($token['data']); |
||
1810 | $this->dom->appendChild($text); |
||
1811 | |||
1812 | /* A character token that is not one of U+0009 CHARACTER TABULATION, |
||
1813 | U+000A LINE FEED (LF), U+000B LINE TABULATION, U+000C FORM FEED |
||
1814 | (FF), or U+0020 SPACE |
||
1815 | A start tag token |
||
1816 | An end tag token |
||
1817 | An end-of-file token */ |
||
1818 | } elseif (($token['type'] === HTML5::CHARACTR && |
||
1819 | !preg_match('/^[\t\n\x0b\x0c ]+$/', $token['data'])) || |
||
1820 | $token['type'] === HTML5::STARTTAG || |
||
1821 | $token['type'] === HTML5::ENDTAG || |
||
1822 | $token['type'] === HTML5::EOF |
||
1823 | ) { |
||
1824 | /* Create an HTMLElement node with the tag name html, in the HTML |
||
1825 | namespace. Append it to the Document object. Switch to the main |
||
1826 | phase and reprocess the current token. */ |
||
1827 | $html = $this->dom->createElement('html'); |
||
1828 | $this->dom->appendChild($html); |
||
1829 | $this->stack[] = $html; |
||
1830 | |||
1831 | $this->phase = self::MAIN_PHASE; |
||
1832 | return $this->mainPhase($token); |
||
1833 | } |
||
1834 | } |
||
1835 | |||
1836 | private function mainPhase($token) |
||
1837 | { |
||
1838 | /* Tokens in the main phase must be handled as follows: */ |
||
1839 | |||
1840 | /* A DOCTYPE token */ |
||
1841 | if ($token['type'] === HTML5::DOCTYPE) { |
||
1842 | // Parse error. Ignore the token. |
||
1843 | |||
1844 | /* A start tag token with the tag name "html" */ |
||
1845 | } elseif ($token['type'] === HTML5::STARTTAG && $token['name'] === 'html') { |
||
1846 | /* If this start tag token was not the first start tag token, then |
||
1847 | it is a parse error. */ |
||
1848 | |||
1849 | /* For each attribute on the token, check to see if the attribute |
||
1850 | is already present on the top element of the stack of open elements. |
||
1851 | If it is not, add the attribute and its corresponding value to that |
||
1852 | element. */ |
||
1853 | foreach ($token['attr'] as $attr) { |
||
1854 | if (!$this->stack[0]->hasAttribute($attr['name'])) { |
||
1855 | $this->stack[0]->setAttribute($attr['name'], $attr['value']); |
||
1856 | } |
||
1857 | } |
||
1858 | |||
1859 | /* An end-of-file token */ |
||
1860 | } elseif ($token['type'] === HTML5::EOF) { |
||
1861 | /* Generate implied end tags. */ |
||
1862 | $this->generateImpliedEndTags(); |
||
1863 | |||
1864 | /* Anything else. */ |
||
1865 | } else { |
||
1866 | /* Depends on the insertion mode: */ |
||
1867 | switch ($this->mode) { |
||
1868 | case self::BEFOR_HEAD: |
||
1869 | return $this->beforeHead($token); |
||
1870 | break; |
||
1871 | case self::IN_HEAD: |
||
1872 | return $this->inHead($token); |
||
1873 | break; |
||
1874 | case self::AFTER_HEAD: |
||
1875 | return $this->afterHead($token); |
||
1876 | break; |
||
1877 | case self::IN_BODY: |
||
1878 | return $this->inBody($token); |
||
1879 | break; |
||
1880 | case self::IN_TABLE: |
||
1881 | return $this->inTable($token); |
||
1882 | break; |
||
1883 | case self::IN_CAPTION: |
||
1884 | return $this->inCaption($token); |
||
1885 | break; |
||
1886 | case self::IN_CGROUP: |
||
1887 | return $this->inColumnGroup($token); |
||
1888 | break; |
||
1889 | case self::IN_TBODY: |
||
1890 | return $this->inTableBody($token); |
||
1891 | break; |
||
1892 | case self::IN_ROW: |
||
1893 | return $this->inRow($token); |
||
1894 | break; |
||
1895 | case self::IN_CELL: |
||
1896 | return $this->inCell($token); |
||
1897 | break; |
||
1898 | case self::IN_SELECT: |
||
1899 | return $this->inSelect($token); |
||
1900 | break; |
||
1901 | case self::AFTER_BODY: |
||
1902 | return $this->afterBody($token); |
||
1903 | break; |
||
1904 | case self::IN_FRAME: |
||
1905 | return $this->inFrameset($token); |
||
1906 | break; |
||
1907 | case self::AFTR_FRAME: |
||
1908 | return $this->afterFrameset($token); |
||
1909 | break; |
||
1910 | case self::END_PHASE: |
||
1911 | return $this->trailingEndPhase($token); |
||
1912 | break; |
||
1913 | } |
||
1914 | } |
||
1915 | } |
||
1916 | |||
1917 | private function beforeHead($token) |
||
1918 | { |
||
1919 | /* Handle the token as follows: */ |
||
1920 | |||
1921 | /* A character token that is one of one of U+0009 CHARACTER TABULATION, |
||
1922 | U+000A LINE FEED (LF), U+000B LINE TABULATION, U+000C FORM FEED (FF), |
||
1923 | or U+0020 SPACE */ |
||
1924 | if ($token['type'] === HTML5::CHARACTR && |
||
1925 | preg_match('/^[\t\n\x0b\x0c ]+$/', $token['data']) |
||
1926 | ) { |
||
1927 | /* Append the character to the current node. */ |
||
1928 | $this->insertText($token['data']); |
||
1929 | |||
1930 | /* A comment token */ |
||
1931 | } elseif ($token['type'] === HTML5::COMMENT) { |
||
1932 | /* Append a Comment node to the current node with the data attribute |
||
1933 | set to the data given in the comment token. */ |
||
1934 | $this->insertComment($token['data']); |
||
1935 | |||
1936 | /* A start tag token with the tag name "head" */ |
||
1937 | } elseif ($token['type'] === HTML5::STARTTAG && $token['name'] === 'head') { |
||
1938 | /* Create an element for the token, append the new element to the |
||
1939 | current node and push it onto the stack of open elements. */ |
||
1940 | $element = $this->insertElement($token); |
||
1941 | |||
1942 | /* Set the head element pointer to this new element node. */ |
||
1943 | $this->head_pointer = $element; |
||
1944 | |||
1945 | /* Change the insertion mode to "in head". */ |
||
1946 | $this->mode = self::IN_HEAD; |
||
1947 | |||
1948 | /* A start tag token whose tag name is one of: "base", "link", "meta", |
||
1949 | "script", "style", "title". Or an end tag with the tag name "html". |
||
1950 | Or a character token that is not one of U+0009 CHARACTER TABULATION, |
||
1951 | U+000A LINE FEED (LF), U+000B LINE TABULATION, U+000C FORM FEED (FF), |
||
1952 | or U+0020 SPACE. Or any other start tag token */ |
||
1953 | } elseif ($token['type'] === HTML5::STARTTAG || |
||
1954 | ($token['type'] === HTML5::ENDTAG && $token['name'] === 'html') || |
||
1955 | ($token['type'] === HTML5::CHARACTR && !preg_match( |
||
1956 | '/^[\t\n\x0b\x0c ]$/', |
||
1957 | $token['data'] |
||
1958 | )) |
||
1959 | ) { |
||
1960 | /* Act as if a start tag token with the tag name "head" and no |
||
1961 | attributes had been seen, then reprocess the current token. */ |
||
1962 | $this->beforeHead( |
||
1963 | array( |
||
1964 | 'name' => 'head', |
||
1965 | 'type' => HTML5::STARTTAG, |
||
1966 | 'attr' => array() |
||
1967 | ) |
||
1968 | ); |
||
1969 | |||
1970 | return $this->inHead($token); |
||
1971 | |||
1972 | /* Any other end tag */ |
||
1973 | } elseif ($token['type'] === HTML5::ENDTAG) { |
||
1974 | /* Parse error. Ignore the token. */ |
||
1975 | } |
||
1976 | } |
||
1977 | |||
1978 | private function inHead($token) |
||
1979 | { |
||
1980 | /* Handle the token as follows: */ |
||
1981 | |||
1982 | /* A character token that is one of one of U+0009 CHARACTER TABULATION, |
||
1983 | U+000A LINE FEED (LF), U+000B LINE TABULATION, U+000C FORM FEED (FF), |
||
1984 | or U+0020 SPACE. |
||
1985 | |||
1986 | THIS DIFFERS FROM THE SPEC: If the current node is either a title, style |
||
1987 | or script element, append the character to the current node regardless |
||
1988 | of its content. */ |
||
1989 | if (($token['type'] === HTML5::CHARACTR && |
||
1990 | preg_match('/^[\t\n\x0b\x0c ]+$/', $token['data'])) || ( |
||
1991 | $token['type'] === HTML5::CHARACTR && in_array( |
||
1992 | end($this->stack)->nodeName, |
||
1993 | array('title', 'style', 'script') |
||
1994 | )) |
||
1995 | ) { |
||
1996 | /* Append the character to the current node. */ |
||
1997 | $this->insertText($token['data']); |
||
1998 | |||
1999 | /* A comment token */ |
||
2000 | } elseif ($token['type'] === HTML5::COMMENT) { |
||
2001 | /* Append a Comment node to the current node with the data attribute |
||
2002 | set to the data given in the comment token. */ |
||
2003 | $this->insertComment($token['data']); |
||
2004 | |||
2005 | } elseif ($token['type'] === HTML5::ENDTAG && |
||
2006 | in_array($token['name'], array('title', 'style', 'script')) |
||
2007 | ) { |
||
2008 | array_pop($this->stack); |
||
2009 | return HTML5::PCDATA; |
||
2010 | |||
2011 | /* A start tag with the tag name "title" */ |
||
2012 | } elseif ($token['type'] === HTML5::STARTTAG && $token['name'] === 'title') { |
||
2013 | /* Create an element for the token and append the new element to the |
||
2014 | node pointed to by the head element pointer, or, if that is null |
||
2015 | (innerHTML case), to the current node. */ |
||
2016 | if ($this->head_pointer !== null) { |
||
2017 | $element = $this->insertElement($token, false); |
||
2018 | $this->head_pointer->appendChild($element); |
||
2019 | |||
2020 | } else { |
||
2021 | $element = $this->insertElement($token); |
||
2022 | } |
||
2023 | |||
2024 | /* Switch the tokeniser's content model flag to the RCDATA state. */ |
||
2025 | return HTML5::RCDATA; |
||
2026 | |||
2027 | /* A start tag with the tag name "style" */ |
||
2028 | } elseif ($token['type'] === HTML5::STARTTAG && $token['name'] === 'style') { |
||
2029 | /* Create an element for the token and append the new element to the |
||
2030 | node pointed to by the head element pointer, or, if that is null |
||
2031 | (innerHTML case), to the current node. */ |
||
2032 | if ($this->head_pointer !== null) { |
||
2033 | $element = $this->insertElement($token, false); |
||
2034 | $this->head_pointer->appendChild($element); |
||
2035 | |||
2036 | } else { |
||
2037 | $this->insertElement($token); |
||
2038 | } |
||
2039 | |||
2040 | /* Switch the tokeniser's content model flag to the CDATA state. */ |
||
2041 | return HTML5::CDATA; |
||
2042 | |||
2043 | /* A start tag with the tag name "script" */ |
||
2044 | } elseif ($token['type'] === HTML5::STARTTAG && $token['name'] === 'script') { |
||
2045 | /* Create an element for the token. */ |
||
2046 | $element = $this->insertElement($token, false); |
||
2047 | $this->head_pointer->appendChild($element); |
||
2048 | |||
2049 | /* Switch the tokeniser's content model flag to the CDATA state. */ |
||
2050 | return HTML5::CDATA; |
||
2051 | |||
2052 | /* A start tag with the tag name "base", "link", or "meta" */ |
||
2053 | } elseif ($token['type'] === HTML5::STARTTAG && in_array( |
||
2054 | $token['name'], |
||
2055 | array('base', 'link', 'meta') |
||
2056 | ) |
||
2057 | ) { |
||
2058 | /* Create an element for the token and append the new element to the |
||
2059 | node pointed to by the head element pointer, or, if that is null |
||
2060 | (innerHTML case), to the current node. */ |
||
2061 | if ($this->head_pointer !== null) { |
||
2062 | $element = $this->insertElement($token, false); |
||
2063 | $this->head_pointer->appendChild($element); |
||
2064 | array_pop($this->stack); |
||
2065 | |||
2066 | } else { |
||
2067 | $this->insertElement($token); |
||
2068 | } |
||
2069 | |||
2070 | /* An end tag with the tag name "head" */ |
||
2071 | } elseif ($token['type'] === HTML5::ENDTAG && $token['name'] === 'head') { |
||
2072 | /* If the current node is a head element, pop the current node off |
||
2073 | the stack of open elements. */ |
||
2074 | if ($this->head_pointer->isSameNode(end($this->stack))) { |
||
2075 | array_pop($this->stack); |
||
2076 | |||
2077 | /* Otherwise, this is a parse error. */ |
||
2078 | } else { |
||
2079 | // k |
||
2080 | } |
||
2081 | |||
2082 | /* Change the insertion mode to "after head". */ |
||
2083 | $this->mode = self::AFTER_HEAD; |
||
2084 | |||
2085 | /* A start tag with the tag name "head" or an end tag except "html". */ |
||
2086 | } elseif (($token['type'] === HTML5::STARTTAG && $token['name'] === 'head') || |
||
2087 | ($token['type'] === HTML5::ENDTAG && $token['name'] !== 'html') |
||
2088 | ) { |
||
2089 | // Parse error. Ignore the token. |
||
2090 | |||
2091 | /* Anything else */ |
||
2092 | } else { |
||
2093 | /* If the current node is a head element, act as if an end tag |
||
2094 | token with the tag name "head" had been seen. */ |
||
2095 | if ($this->head_pointer->isSameNode(end($this->stack))) { |
||
2096 | $this->inHead( |
||
2097 | array( |
||
2098 | 'name' => 'head', |
||
2099 | 'type' => HTML5::ENDTAG |
||
2100 | ) |
||
2101 | ); |
||
2102 | |||
2103 | /* Otherwise, change the insertion mode to "after head". */ |
||
2104 | } else { |
||
2105 | $this->mode = self::AFTER_HEAD; |
||
2106 | } |
||
2107 | |||
2108 | /* Then, reprocess the current token. */ |
||
2109 | return $this->afterHead($token); |
||
2110 | } |
||
2111 | } |
||
2112 | |||
2113 | private function afterHead($token) |
||
2114 | { |
||
2115 | /* Handle the token as follows: */ |
||
2116 | |||
2117 | /* A character token that is one of one of U+0009 CHARACTER TABULATION, |
||
2118 | U+000A LINE FEED (LF), U+000B LINE TABULATION, U+000C FORM FEED (FF), |
||
2119 | or U+0020 SPACE */ |
||
2120 | if ($token['type'] === HTML5::CHARACTR && |
||
2121 | preg_match('/^[\t\n\x0b\x0c ]+$/', $token['data']) |
||
2122 | ) { |
||
2123 | /* Append the character to the current node. */ |
||
2124 | $this->insertText($token['data']); |
||
2125 | |||
2126 | /* A comment token */ |
||
2127 | } elseif ($token['type'] === HTML5::COMMENT) { |
||
2128 | /* Append a Comment node to the current node with the data attribute |
||
2129 | set to the data given in the comment token. */ |
||
2130 | $this->insertComment($token['data']); |
||
2131 | |||
2132 | /* A start tag token with the tag name "body" */ |
||
2133 | } elseif ($token['type'] === HTML5::STARTTAG && $token['name'] === 'body') { |
||
2134 | /* Insert a body element for the token. */ |
||
2135 | $this->insertElement($token); |
||
2136 | |||
2137 | /* Change the insertion mode to "in body". */ |
||
2138 | $this->mode = self::IN_BODY; |
||
2139 | |||
2140 | /* A start tag token with the tag name "frameset" */ |
||
2141 | } elseif ($token['type'] === HTML5::STARTTAG && $token['name'] === 'frameset') { |
||
2142 | /* Insert a frameset element for the token. */ |
||
2143 | $this->insertElement($token); |
||
2144 | |||
2145 | /* Change the insertion mode to "in frameset". */ |
||
2146 | $this->mode = self::IN_FRAME; |
||
2147 | |||
2148 | /* A start tag token whose tag name is one of: "base", "link", "meta", |
||
2149 | "script", "style", "title" */ |
||
2150 | } elseif ($token['type'] === HTML5::STARTTAG && in_array( |
||
2151 | $token['name'], |
||
2152 | array('base', 'link', 'meta', 'script', 'style', 'title') |
||
2153 | ) |
||
2154 | ) { |
||
2155 | /* Parse error. Switch the insertion mode back to "in head" and |
||
2156 | reprocess the token. */ |
||
2157 | $this->mode = self::IN_HEAD; |
||
2158 | return $this->inHead($token); |
||
2159 | |||
2160 | /* Anything else */ |
||
2161 | } else { |
||
2162 | /* Act as if a start tag token with the tag name "body" and no |
||
2163 | attributes had been seen, and then reprocess the current token. */ |
||
2164 | $this->afterHead( |
||
2165 | array( |
||
2166 | 'name' => 'body', |
||
2167 | 'type' => HTML5::STARTTAG, |
||
2168 | 'attr' => array() |
||
2169 | ) |
||
2170 | ); |
||
2171 | |||
2172 | return $this->inBody($token); |
||
2173 | } |
||
2174 | } |
||
2175 | |||
2176 | private function inBody($token) |
||
2177 | { |
||
2178 | /* Handle the token as follows: */ |
||
2179 | |||
2180 | switch ($token['type']) { |
||
2181 | /* A character token */ |
||
2182 | case HTML5::CHARACTR: |
||
2183 | /* Reconstruct the active formatting elements, if any. */ |
||
2184 | $this->reconstructActiveFormattingElements(); |
||
2185 | |||
2186 | /* Append the token's character to the current node. */ |
||
2187 | $this->insertText($token['data']); |
||
2188 | break; |
||
2189 | |||
2190 | /* A comment token */ |
||
2191 | case HTML5::COMMENT: |
||
2192 | /* Append a Comment node to the current node with the data |
||
2193 | attribute set to the data given in the comment token. */ |
||
2194 | $this->insertComment($token['data']); |
||
2195 | break; |
||
2196 | |||
2197 | case HTML5::STARTTAG: |
||
2198 | switch ($token['name']) { |
||
2199 | /* A start tag token whose tag name is one of: "script", |
||
2200 | "style" */ |
||
2201 | case 'script': |
||
2202 | case 'style': |
||
2203 | /* Process the token as if the insertion mode had been "in |
||
2204 | head". */ |
||
2205 | return $this->inHead($token); |
||
2206 | break; |
||
2207 | |||
2208 | /* A start tag token whose tag name is one of: "base", "link", |
||
2209 | "meta", "title" */ |
||
2210 | case 'base': |
||
2211 | case 'link': |
||
2212 | case 'meta': |
||
2213 | case 'title': |
||
2214 | /* Parse error. Process the token as if the insertion mode |
||
2215 | had been "in head". */ |
||
2216 | return $this->inHead($token); |
||
2217 | break; |
||
2218 | |||
2219 | /* A start tag token with the tag name "body" */ |
||
2220 | case 'body': |
||
2221 | /* Parse error. If the second element on the stack of open |
||
2222 | elements is not a body element, or, if the stack of open |
||
2223 | elements has only one node on it, then ignore the token. |
||
2224 | (innerHTML case) */ |
||
2225 | if (count($this->stack) === 1 || $this->stack[1]->nodeName !== 'body') { |
||
2226 | // Ignore |
||
2227 | |||
2228 | /* Otherwise, for each attribute on the token, check to see |
||
2229 | if the attribute is already present on the body element (the |
||
2230 | second element) on the stack of open elements. If it is not, |
||
2231 | add the attribute and its corresponding value to that |
||
2232 | element. */ |
||
2233 | } else { |
||
2234 | foreach ($token['attr'] as $attr) { |
||
2235 | if (!$this->stack[1]->hasAttribute($attr['name'])) { |
||
2236 | $this->stack[1]->setAttribute($attr['name'], $attr['value']); |
||
2237 | } |
||
2238 | } |
||
2239 | } |
||
2240 | break; |
||
2241 | |||
2242 | /* A start tag whose tag name is one of: "address", |
||
2243 | "blockquote", "center", "dir", "div", "dl", "fieldset", |
||
2244 | "listing", "menu", "ol", "p", "ul" */ |
||
2245 | case 'address': |
||
2246 | case 'blockquote': |
||
2247 | case 'center': |
||
2248 | case 'dir': |
||
2249 | case 'div': |
||
2250 | case 'dl': |
||
2251 | case 'fieldset': |
||
2252 | case 'listing': |
||
2253 | case 'menu': |
||
2254 | case 'ol': |
||
2255 | case 'p': |
||
2256 | case 'ul': |
||
2257 | /* If the stack of open elements has a p element in scope, |
||
2258 | then act as if an end tag with the tag name p had been |
||
2259 | seen. */ |
||
2260 | if ($this->elementInScope('p')) { |
||
2261 | $this->emitToken( |
||
2262 | array( |
||
2263 | 'name' => 'p', |
||
2264 | 'type' => HTML5::ENDTAG |
||
2265 | ) |
||
2266 | ); |
||
2267 | } |
||
2268 | |||
2269 | /* Insert an HTML element for the token. */ |
||
2270 | $this->insertElement($token); |
||
2271 | break; |
||
2272 | |||
2273 | /* A start tag whose tag name is "form" */ |
||
2274 | case 'form': |
||
2275 | /* If the form element pointer is not null, ignore the |
||
2276 | token with a parse error. */ |
||
2277 | if ($this->form_pointer !== null) { |
||
2278 | // Ignore. |
||
2279 | |||
2280 | /* Otherwise: */ |
||
2281 | } else { |
||
2282 | /* If the stack of open elements has a p element in |
||
2283 | scope, then act as if an end tag with the tag name p |
||
2284 | had been seen. */ |
||
2285 | if ($this->elementInScope('p')) { |
||
2286 | $this->emitToken( |
||
2287 | array( |
||
2288 | 'name' => 'p', |
||
2289 | 'type' => HTML5::ENDTAG |
||
2290 | ) |
||
2291 | ); |
||
2292 | } |
||
2293 | |||
2294 | /* Insert an HTML element for the token, and set the |
||
2295 | form element pointer to point to the element created. */ |
||
2296 | $element = $this->insertElement($token); |
||
2297 | $this->form_pointer = $element; |
||
2298 | } |
||
2299 | break; |
||
2300 | |||
2301 | /* A start tag whose tag name is "li", "dd" or "dt" */ |
||
2302 | case 'li': |
||
2303 | case 'dd': |
||
2304 | case 'dt': |
||
2305 | /* If the stack of open elements has a p element in scope, |
||
2306 | then act as if an end tag with the tag name p had been |
||
2307 | seen. */ |
||
2308 | if ($this->elementInScope('p')) { |
||
2309 | $this->emitToken( |
||
2310 | array( |
||
2311 | 'name' => 'p', |
||
2312 | 'type' => HTML5::ENDTAG |
||
2313 | ) |
||
2314 | ); |
||
2315 | } |
||
2316 | |||
2317 | $stack_length = count($this->stack) - 1; |
||
2318 | |||
2319 | for ($n = $stack_length; 0 <= $n; $n--) { |
||
2320 | /* 1. Initialise node to be the current node (the |
||
2321 | bottommost node of the stack). */ |
||
2322 | $stop = false; |
||
2323 | $node = $this->stack[$n]; |
||
2324 | $cat = $this->getElementCategory($node->tagName); |
||
2325 | |||
2326 | /* 2. If node is an li, dd or dt element, then pop all |
||
2327 | the nodes from the current node up to node, including |
||
2328 | node, then stop this algorithm. */ |
||
2329 | if ($token['name'] === $node->tagName || ($token['name'] !== 'li' |
||
2330 | && ($node->tagName === 'dd' || $node->tagName === 'dt')) |
||
2331 | ) { |
||
2332 | for ($x = $stack_length; $x >= $n; $x--) { |
||
2333 | array_pop($this->stack); |
||
2334 | } |
||
2335 | |||
2336 | break; |
||
2337 | } |
||
2338 | |||
2339 | /* 3. If node is not in the formatting category, and is |
||
2340 | not in the phrasing category, and is not an address or |
||
2341 | div element, then stop this algorithm. */ |
||
2342 | if ($cat !== self::FORMATTING && $cat !== self::PHRASING && |
||
2343 | $node->tagName !== 'address' && $node->tagName !== 'div' |
||
2344 | ) { |
||
2345 | break; |
||
2346 | } |
||
2347 | } |
||
2348 | |||
2349 | /* Finally, insert an HTML element with the same tag |
||
2350 | name as the token's. */ |
||
2351 | $this->insertElement($token); |
||
2352 | break; |
||
2353 | |||
2354 | /* A start tag token whose tag name is "plaintext" */ |
||
2355 | case 'plaintext': |
||
2356 | /* If the stack of open elements has a p element in scope, |
||
2357 | then act as if an end tag with the tag name p had been |
||
2358 | seen. */ |
||
2359 | if ($this->elementInScope('p')) { |
||
2360 | $this->emitToken( |
||
2361 | array( |
||
2362 | 'name' => 'p', |
||
2363 | 'type' => HTML5::ENDTAG |
||
2364 | ) |
||
2365 | ); |
||
2366 | } |
||
2367 | |||
2368 | /* Insert an HTML element for the token. */ |
||
2369 | $this->insertElement($token); |
||
2370 | |||
2371 | return HTML5::PLAINTEXT; |
||
2372 | break; |
||
2373 | |||
2374 | /* A start tag whose tag name is one of: "h1", "h2", "h3", "h4", |
||
2375 | "h5", "h6" */ |
||
2376 | case 'h1': |
||
2377 | case 'h2': |
||
2378 | case 'h3': |
||
2379 | case 'h4': |
||
2380 | case 'h5': |
||
2381 | case 'h6': |
||
2382 | /* If the stack of open elements has a p element in scope, |
||
2383 | then act as if an end tag with the tag name p had been seen. */ |
||
2384 | if ($this->elementInScope('p')) { |
||
2385 | $this->emitToken( |
||
2386 | array( |
||
2387 | 'name' => 'p', |
||
2388 | 'type' => HTML5::ENDTAG |
||
2389 | ) |
||
2390 | ); |
||
2391 | } |
||
2392 | |||
2393 | /* If the stack of open elements has in scope an element whose |
||
2394 | tag name is one of "h1", "h2", "h3", "h4", "h5", or "h6", then |
||
2395 | this is a parse error; pop elements from the stack until an |
||
2396 | element with one of those tag names has been popped from the |
||
2397 | stack. */ |
||
2398 | while ($this->elementInScope(array('h1', 'h2', 'h3', 'h4', 'h5', 'h6'))) { |
||
2399 | array_pop($this->stack); |
||
2400 | } |
||
2401 | |||
2402 | /* Insert an HTML element for the token. */ |
||
2403 | $this->insertElement($token); |
||
2404 | break; |
||
2405 | |||
2406 | /* A start tag whose tag name is "a" */ |
||
2407 | case 'a': |
||
2408 | /* If the list of active formatting elements contains |
||
2409 | an element whose tag name is "a" between the end of the |
||
2410 | list and the last marker on the list (or the start of |
||
2411 | the list if there is no marker on the list), then this |
||
2412 | is a parse error; act as if an end tag with the tag name |
||
2413 | "a" had been seen, then remove that element from the list |
||
2414 | of active formatting elements and the stack of open |
||
2415 | elements if the end tag didn't already remove it (it |
||
2416 | might not have if the element is not in table scope). */ |
||
2417 | $leng = count($this->a_formatting); |
||
2418 | |||
2419 | for ($n = $leng - 1; $n >= 0; $n--) { |
||
2420 | if ($this->a_formatting[$n] === self::MARKER) { |
||
2421 | break; |
||
2422 | |||
2423 | } elseif ($this->a_formatting[$n]->nodeName === 'a') { |
||
2424 | $this->emitToken( |
||
2425 | array( |
||
2426 | 'name' => 'a', |
||
2427 | 'type' => HTML5::ENDTAG |
||
2428 | ) |
||
2429 | ); |
||
2430 | break; |
||
2431 | } |
||
2432 | } |
||
2433 | |||
2434 | /* Reconstruct the active formatting elements, if any. */ |
||
2435 | $this->reconstructActiveFormattingElements(); |
||
2436 | |||
2437 | /* Insert an HTML element for the token. */ |
||
2438 | $el = $this->insertElement($token); |
||
2439 | |||
2440 | /* Add that element to the list of active formatting |
||
2441 | elements. */ |
||
2442 | $this->a_formatting[] = $el; |
||
2443 | break; |
||
2444 | |||
2445 | /* A start tag whose tag name is one of: "b", "big", "em", "font", |
||
2446 | "i", "nobr", "s", "small", "strike", "strong", "tt", "u" */ |
||
2447 | case 'b': |
||
2448 | case 'big': |
||
2449 | case 'em': |
||
2450 | case 'font': |
||
2451 | case 'i': |
||
2452 | case 'nobr': |
||
2453 | case 's': |
||
2454 | case 'small': |
||
2455 | case 'strike': |
||
2456 | case 'strong': |
||
2457 | case 'tt': |
||
2458 | case 'u': |
||
2459 | /* Reconstruct the active formatting elements, if any. */ |
||
2460 | $this->reconstructActiveFormattingElements(); |
||
2461 | |||
2462 | /* Insert an HTML element for the token. */ |
||
2463 | $el = $this->insertElement($token); |
||
2464 | |||
2465 | /* Add that element to the list of active formatting |
||
2466 | elements. */ |
||
2467 | $this->a_formatting[] = $el; |
||
2468 | break; |
||
2469 | |||
2470 | /* A start tag token whose tag name is "button" */ |
||
2471 | case 'button': |
||
2472 | /* If the stack of open elements has a button element in scope, |
||
2473 | then this is a parse error; act as if an end tag with the tag |
||
2474 | name "button" had been seen, then reprocess the token. (We don't |
||
2475 | do that. Unnecessary.) */ |
||
2476 | if ($this->elementInScope('button')) { |
||
2477 | $this->inBody( |
||
2478 | array( |
||
2479 | 'name' => 'button', |
||
2480 | 'type' => HTML5::ENDTAG |
||
2481 | ) |
||
2482 | ); |
||
2483 | } |
||
2484 | |||
2485 | /* Reconstruct the active formatting elements, if any. */ |
||
2486 | $this->reconstructActiveFormattingElements(); |
||
2487 | |||
2488 | /* Insert an HTML element for the token. */ |
||
2489 | $this->insertElement($token); |
||
2490 | |||
2491 | /* Insert a marker at the end of the list of active |
||
2492 | formatting elements. */ |
||
2493 | $this->a_formatting[] = self::MARKER; |
||
2494 | break; |
||
2495 | |||
2496 | /* A start tag token whose tag name is one of: "marquee", "object" */ |
||
2497 | case 'marquee': |
||
2498 | case 'object': |
||
2499 | /* Reconstruct the active formatting elements, if any. */ |
||
2500 | $this->reconstructActiveFormattingElements(); |
||
2501 | |||
2502 | /* Insert an HTML element for the token. */ |
||
2503 | $this->insertElement($token); |
||
2504 | |||
2505 | /* Insert a marker at the end of the list of active |
||
2506 | formatting elements. */ |
||
2507 | $this->a_formatting[] = self::MARKER; |
||
2508 | break; |
||
2509 | |||
2510 | /* A start tag token whose tag name is "xmp" */ |
||
2511 | case 'xmp': |
||
2512 | /* Reconstruct the active formatting elements, if any. */ |
||
2513 | $this->reconstructActiveFormattingElements(); |
||
2514 | |||
2515 | /* Insert an HTML element for the token. */ |
||
2516 | $this->insertElement($token); |
||
2517 | |||
2518 | /* Switch the content model flag to the CDATA state. */ |
||
2519 | return HTML5::CDATA; |
||
2520 | break; |
||
2521 | |||
2522 | /* A start tag whose tag name is "table" */ |
||
2523 | case 'table': |
||
2524 | /* If the stack of open elements has a p element in scope, |
||
2525 | then act as if an end tag with the tag name p had been seen. */ |
||
2526 | if ($this->elementInScope('p')) { |
||
2527 | $this->emitToken( |
||
2528 | array( |
||
2529 | 'name' => 'p', |
||
2530 | 'type' => HTML5::ENDTAG |
||
2531 | ) |
||
2532 | ); |
||
2533 | } |
||
2534 | |||
2535 | /* Insert an HTML element for the token. */ |
||
2536 | $this->insertElement($token); |
||
2537 | |||
2538 | /* Change the insertion mode to "in table". */ |
||
2539 | $this->mode = self::IN_TABLE; |
||
2540 | break; |
||
2541 | |||
2542 | /* A start tag whose tag name is one of: "area", "basefont", |
||
2543 | "bgsound", "br", "embed", "img", "param", "spacer", "wbr" */ |
||
2544 | case 'area': |
||
2545 | case 'basefont': |
||
2546 | case 'bgsound': |
||
2547 | case 'br': |
||
2548 | case 'embed': |
||
2549 | case 'img': |
||
2550 | case 'param': |
||
2551 | case 'spacer': |
||
2552 | case 'wbr': |
||
2553 | /* Reconstruct the active formatting elements, if any. */ |
||
2554 | $this->reconstructActiveFormattingElements(); |
||
2555 | |||
2556 | /* Insert an HTML element for the token. */ |
||
2557 | $this->insertElement($token); |
||
2558 | |||
2559 | /* Immediately pop the current node off the stack of open elements. */ |
||
2560 | array_pop($this->stack); |
||
2561 | break; |
||
2562 | |||
2563 | /* A start tag whose tag name is "hr" */ |
||
2564 | case 'hr': |
||
2565 | /* If the stack of open elements has a p element in scope, |
||
2566 | then act as if an end tag with the tag name p had been seen. */ |
||
2567 | if ($this->elementInScope('p')) { |
||
2568 | $this->emitToken( |
||
2569 | array( |
||
2570 | 'name' => 'p', |
||
2571 | 'type' => HTML5::ENDTAG |
||
2572 | ) |
||
2573 | ); |
||
2574 | } |
||
2575 | |||
2576 | /* Insert an HTML element for the token. */ |
||
2577 | $this->insertElement($token); |
||
2578 | |||
2579 | /* Immediately pop the current node off the stack of open elements. */ |
||
2580 | array_pop($this->stack); |
||
2581 | break; |
||
2582 | |||
2583 | /* A start tag whose tag name is "image" */ |
||
2584 | case 'image': |
||
2585 | /* Parse error. Change the token's tag name to "img" and |
||
2586 | reprocess it. (Don't ask.) */ |
||
2587 | $token['name'] = 'img'; |
||
2588 | return $this->inBody($token); |
||
2589 | break; |
||
2590 | |||
2591 | /* A start tag whose tag name is "input" */ |
||
2592 | case 'input': |
||
2593 | /* Reconstruct the active formatting elements, if any. */ |
||
2594 | $this->reconstructActiveFormattingElements(); |
||
2595 | |||
2596 | /* Insert an input element for the token. */ |
||
2597 | $element = $this->insertElement($token, false); |
||
2598 | |||
2599 | /* If the form element pointer is not null, then associate the |
||
2600 | input element with the form element pointed to by the form |
||
2601 | element pointer. */ |
||
2602 | $this->form_pointer !== null |
||
2603 | ? $this->form_pointer->appendChild($element) |
||
2604 | : end($this->stack)->appendChild($element); |
||
2605 | |||
2606 | /* Pop that input element off the stack of open elements. */ |
||
2607 | array_pop($this->stack); |
||
2608 | break; |
||
2609 | |||
2610 | /* A start tag whose tag name is "isindex" */ |
||
2611 | case 'isindex': |
||
2612 | /* Parse error. */ |
||
2613 | // w/e |
||
2614 | |||
2615 | /* If the form element pointer is not null, |
||
2616 | then ignore the token. */ |
||
2617 | if ($this->form_pointer === null) { |
||
2618 | /* Act as if a start tag token with the tag name "form" had |
||
2619 | been seen. */ |
||
2620 | $this->inBody( |
||
2621 | array( |
||
2622 | 'name' => 'body', |
||
2623 | 'type' => HTML5::STARTTAG, |
||
2624 | 'attr' => array() |
||
2625 | ) |
||
2626 | ); |
||
2627 | |||
2628 | /* Act as if a start tag token with the tag name "hr" had |
||
2629 | been seen. */ |
||
2630 | $this->inBody( |
||
2631 | array( |
||
2632 | 'name' => 'hr', |
||
2633 | 'type' => HTML5::STARTTAG, |
||
2634 | 'attr' => array() |
||
2635 | ) |
||
2636 | ); |
||
2637 | |||
2638 | /* Act as if a start tag token with the tag name "p" had |
||
2639 | been seen. */ |
||
2640 | $this->inBody( |
||
2641 | array( |
||
2642 | 'name' => 'p', |
||
2643 | 'type' => HTML5::STARTTAG, |
||
2644 | 'attr' => array() |
||
2645 | ) |
||
2646 | ); |
||
2647 | |||
2648 | /* Act as if a start tag token with the tag name "label" |
||
2649 | had been seen. */ |
||
2650 | $this->inBody( |
||
2651 | array( |
||
2652 | 'name' => 'label', |
||
2653 | 'type' => HTML5::STARTTAG, |
||
2654 | 'attr' => array() |
||
2655 | ) |
||
2656 | ); |
||
2657 | |||
2658 | /* Act as if a stream of character tokens had been seen. */ |
||
2659 | $this->insertText( |
||
2660 | 'This is a searchable index. ' . |
||
2661 | 'Insert your search keywords here: ' |
||
2662 | ); |
||
2663 | |||
2664 | /* Act as if a start tag token with the tag name "input" |
||
2665 | had been seen, with all the attributes from the "isindex" |
||
2666 | token, except with the "name" attribute set to the value |
||
2667 | "isindex" (ignoring any explicit "name" attribute). */ |
||
2668 | $attr = $token['attr']; |
||
2669 | $attr[] = array('name' => 'name', 'value' => 'isindex'); |
||
2670 | |||
2671 | $this->inBody( |
||
2672 | array( |
||
2673 | 'name' => 'input', |
||
2674 | 'type' => HTML5::STARTTAG, |
||
2675 | 'attr' => $attr |
||
2676 | ) |
||
2677 | ); |
||
2678 | |||
2679 | /* Act as if a stream of character tokens had been seen |
||
2680 | (see below for what they should say). */ |
||
2681 | $this->insertText( |
||
2682 | 'This is a searchable index. ' . |
||
2683 | 'Insert your search keywords here: ' |
||
2684 | ); |
||
2685 | |||
2686 | /* Act as if an end tag token with the tag name "label" |
||
2687 | had been seen. */ |
||
2688 | $this->inBody( |
||
2689 | array( |
||
2690 | 'name' => 'label', |
||
2691 | 'type' => HTML5::ENDTAG |
||
2692 | ) |
||
2693 | ); |
||
2694 | |||
2695 | /* Act as if an end tag token with the tag name "p" had |
||
2696 | been seen. */ |
||
2697 | $this->inBody( |
||
2698 | array( |
||
2699 | 'name' => 'p', |
||
2700 | 'type' => HTML5::ENDTAG |
||
2701 | ) |
||
2702 | ); |
||
2703 | |||
2704 | /* Act as if a start tag token with the tag name "hr" had |
||
2705 | been seen. */ |
||
2706 | $this->inBody( |
||
2707 | array( |
||
2708 | 'name' => 'hr', |
||
2709 | 'type' => HTML5::ENDTAG |
||
2710 | ) |
||
2711 | ); |
||
2712 | |||
2713 | /* Act as if an end tag token with the tag name "form" had |
||
2714 | been seen. */ |
||
2715 | $this->inBody( |
||
2716 | array( |
||
2717 | 'name' => 'form', |
||
2718 | 'type' => HTML5::ENDTAG |
||
2719 | ) |
||
2720 | ); |
||
2721 | } |
||
2722 | break; |
||
2723 | |||
2724 | /* A start tag whose tag name is "textarea" */ |
||
2725 | case 'textarea': |
||
2726 | $this->insertElement($token); |
||
2727 | |||
2728 | /* Switch the tokeniser's content model flag to the |
||
2729 | RCDATA state. */ |
||
2730 | return HTML5::RCDATA; |
||
2731 | break; |
||
2732 | |||
2733 | /* A start tag whose tag name is one of: "iframe", "noembed", |
||
2734 | "noframes" */ |
||
2735 | case 'iframe': |
||
2736 | case 'noembed': |
||
2737 | case 'noframes': |
||
2738 | $this->insertElement($token); |
||
2739 | |||
2740 | /* Switch the tokeniser's content model flag to the CDATA state. */ |
||
2741 | return HTML5::CDATA; |
||
2742 | break; |
||
2743 | |||
2744 | /* A start tag whose tag name is "select" */ |
||
2745 | case 'select': |
||
2746 | /* Reconstruct the active formatting elements, if any. */ |
||
2747 | $this->reconstructActiveFormattingElements(); |
||
2748 | |||
2749 | /* Insert an HTML element for the token. */ |
||
2750 | $this->insertElement($token); |
||
2751 | |||
2752 | /* Change the insertion mode to "in select". */ |
||
2753 | $this->mode = self::IN_SELECT; |
||
2754 | break; |
||
2755 | |||
2756 | /* A start or end tag whose tag name is one of: "caption", "col", |
||
2757 | "colgroup", "frame", "frameset", "head", "option", "optgroup", |
||
2758 | "tbody", "td", "tfoot", "th", "thead", "tr". */ |
||
2759 | case 'caption': |
||
2760 | case 'col': |
||
2761 | case 'colgroup': |
||
2762 | case 'frame': |
||
2763 | case 'frameset': |
||
2764 | case 'head': |
||
2765 | case 'option': |
||
2766 | case 'optgroup': |
||
2767 | case 'tbody': |
||
2768 | case 'td': |
||
2769 | case 'tfoot': |
||
2770 | case 'th': |
||
2771 | case 'thead': |
||
2772 | case 'tr': |
||
2773 | // Parse error. Ignore the token. |
||
2774 | break; |
||
2775 | |||
2776 | /* A start or end tag whose tag name is one of: "event-source", |
||
2777 | "section", "nav", "article", "aside", "header", "footer", |
||
2778 | "datagrid", "command" */ |
||
2779 | case 'event-source': |
||
2780 | case 'section': |
||
2781 | case 'nav': |
||
2782 | case 'article': |
||
2783 | case 'aside': |
||
2784 | case 'header': |
||
2785 | case 'footer': |
||
2786 | case 'datagrid': |
||
2787 | case 'command': |
||
2788 | // Work in progress! |
||
2789 | break; |
||
2790 | |||
2791 | /* A start tag token not covered by the previous entries */ |
||
2792 | default: |
||
2793 | /* Reconstruct the active formatting elements, if any. */ |
||
2794 | $this->reconstructActiveFormattingElements(); |
||
2795 | |||
2796 | $this->insertElement($token, true, true); |
||
2797 | break; |
||
2798 | } |
||
2799 | break; |
||
2800 | |||
2801 | case HTML5::ENDTAG: |
||
2802 | switch ($token['name']) { |
||
2803 | /* An end tag with the tag name "body" */ |
||
2804 | case 'body': |
||
2805 | /* If the second element in the stack of open elements is |
||
2806 | not a body element, this is a parse error. Ignore the token. |
||
2807 | (innerHTML case) */ |
||
2808 | if (count($this->stack) < 2 || $this->stack[1]->nodeName !== 'body') { |
||
2809 | // Ignore. |
||
2810 | |||
2811 | /* If the current node is not the body element, then this |
||
2812 | is a parse error. */ |
||
2813 | } elseif (end($this->stack)->nodeName !== 'body') { |
||
2814 | // Parse error. |
||
2815 | } |
||
2816 | |||
2817 | /* Change the insertion mode to "after body". */ |
||
2818 | $this->mode = self::AFTER_BODY; |
||
2819 | break; |
||
2820 | |||
2821 | /* An end tag with the tag name "html" */ |
||
2822 | case 'html': |
||
2823 | /* Act as if an end tag with tag name "body" had been seen, |
||
2824 | then, if that token wasn't ignored, reprocess the current |
||
2825 | token. */ |
||
2826 | $this->inBody( |
||
2827 | array( |
||
2828 | 'name' => 'body', |
||
2829 | 'type' => HTML5::ENDTAG |
||
2830 | ) |
||
2831 | ); |
||
2832 | |||
2833 | return $this->afterBody($token); |
||
2834 | break; |
||
2835 | |||
2836 | /* An end tag whose tag name is one of: "address", "blockquote", |
||
2837 | "center", "dir", "div", "dl", "fieldset", "listing", "menu", |
||
2838 | "ol", "pre", "ul" */ |
||
2839 | case 'address': |
||
2840 | case 'blockquote': |
||
2841 | case 'center': |
||
2842 | case 'dir': |
||
2843 | case 'div': |
||
2844 | case 'dl': |
||
2845 | case 'fieldset': |
||
2846 | case 'listing': |
||
2847 | case 'menu': |
||
2848 | case 'ol': |
||
2849 | case 'pre': |
||
2850 | case 'ul': |
||
2851 | /* If the stack of open elements has an element in scope |
||
2852 | with the same tag name as that of the token, then generate |
||
2853 | implied end tags. */ |
||
2854 | if ($this->elementInScope($token['name'])) { |
||
2855 | $this->generateImpliedEndTags(); |
||
2856 | |||
2857 | /* Now, if the current node is not an element with |
||
2858 | the same tag name as that of the token, then this |
||
2859 | is a parse error. */ |
||
2860 | // w/e |
||
2861 | |||
2862 | /* If the stack of open elements has an element in |
||
2863 | scope with the same tag name as that of the token, |
||
2864 | then pop elements from this stack until an element |
||
2865 | with that tag name has been popped from the stack. */ |
||
2866 | for ($n = count($this->stack) - 1; $n >= 0; $n--) { |
||
2867 | if ($this->stack[$n]->nodeName === $token['name']) { |
||
2868 | $n = -1; |
||
2869 | } |
||
2870 | |||
2871 | array_pop($this->stack); |
||
2872 | } |
||
2873 | } |
||
2874 | break; |
||
2875 | |||
2876 | /* An end tag whose tag name is "form" */ |
||
2877 | case 'form': |
||
2878 | /* If the stack of open elements has an element in scope |
||
2879 | with the same tag name as that of the token, then generate |
||
2880 | implied end tags. */ |
||
2881 | if ($this->elementInScope($token['name'])) { |
||
2882 | $this->generateImpliedEndTags(); |
||
2883 | |||
2884 | } |
||
2885 | |||
2886 | if (end($this->stack)->nodeName !== $token['name']) { |
||
2887 | /* Now, if the current node is not an element with the |
||
2888 | same tag name as that of the token, then this is a parse |
||
2889 | error. */ |
||
2890 | // w/e |
||
2891 | |||
2892 | } else { |
||
2893 | /* Otherwise, if the current node is an element with |
||
2894 | the same tag name as that of the token pop that element |
||
2895 | from the stack. */ |
||
2896 | array_pop($this->stack); |
||
2897 | } |
||
2898 | |||
2899 | /* In any case, set the form element pointer to null. */ |
||
2900 | $this->form_pointer = null; |
||
2901 | break; |
||
2902 | |||
2903 | /* An end tag whose tag name is "p" */ |
||
2904 | case 'p': |
||
2905 | /* If the stack of open elements has a p element in scope, |
||
2906 | then generate implied end tags, except for p elements. */ |
||
2907 | if ($this->elementInScope('p')) { |
||
2908 | $this->generateImpliedEndTags(array('p')); |
||
2909 | |||
2910 | /* If the current node is not a p element, then this is |
||
2911 | a parse error. */ |
||
2912 | // k |
||
2913 | |||
2914 | /* If the stack of open elements has a p element in |
||
2915 | scope, then pop elements from this stack until the stack |
||
2916 | no longer has a p element in scope. */ |
||
2917 | for ($n = count($this->stack) - 1; $n >= 0; $n--) { |
||
2918 | if ($this->elementInScope('p')) { |
||
2919 | array_pop($this->stack); |
||
2920 | |||
2921 | } else { |
||
2922 | break; |
||
2923 | } |
||
2924 | } |
||
2925 | } |
||
2926 | break; |
||
2927 | |||
2928 | /* An end tag whose tag name is "dd", "dt", or "li" */ |
||
2929 | case 'dd': |
||
2930 | case 'dt': |
||
2931 | case 'li': |
||
2932 | /* If the stack of open elements has an element in scope |
||
2933 | whose tag name matches the tag name of the token, then |
||
2934 | generate implied end tags, except for elements with the |
||
2935 | same tag name as the token. */ |
||
2936 | if ($this->elementInScope($token['name'])) { |
||
2937 | $this->generateImpliedEndTags(array($token['name'])); |
||
2938 | |||
2939 | /* If the current node is not an element with the same |
||
2940 | tag name as the token, then this is a parse error. */ |
||
2941 | // w/e |
||
2942 | |||
2943 | /* If the stack of open elements has an element in scope |
||
2944 | whose tag name matches the tag name of the token, then |
||
2945 | pop elements from this stack until an element with that |
||
2946 | tag name has been popped from the stack. */ |
||
2947 | for ($n = count($this->stack) - 1; $n >= 0; $n--) { |
||
2948 | if ($this->stack[$n]->nodeName === $token['name']) { |
||
2949 | $n = -1; |
||
2950 | } |
||
2951 | |||
2952 | array_pop($this->stack); |
||
2953 | } |
||
2954 | } |
||
2955 | break; |
||
2956 | |||
2957 | /* An end tag whose tag name is one of: "h1", "h2", "h3", "h4", |
||
2958 | "h5", "h6" */ |
||
2959 | case 'h1': |
||
2960 | case 'h2': |
||
2961 | case 'h3': |
||
2962 | case 'h4': |
||
2963 | case 'h5': |
||
2964 | case 'h6': |
||
2965 | $elements = array('h1', 'h2', 'h3', 'h4', 'h5', 'h6'); |
||
2966 | |||
2967 | /* If the stack of open elements has in scope an element whose |
||
2968 | tag name is one of "h1", "h2", "h3", "h4", "h5", or "h6", then |
||
2969 | generate implied end tags. */ |
||
2970 | if ($this->elementInScope($elements)) { |
||
2971 | $this->generateImpliedEndTags(); |
||
2972 | |||
2973 | /* Now, if the current node is not an element with the same |
||
2974 | tag name as that of the token, then this is a parse error. */ |
||
2975 | // w/e |
||
2976 | |||
2977 | /* If the stack of open elements has in scope an element |
||
2978 | whose tag name is one of "h1", "h2", "h3", "h4", "h5", or |
||
2979 | "h6", then pop elements from the stack until an element |
||
2980 | with one of those tag names has been popped from the stack. */ |
||
2981 | while ($this->elementInScope($elements)) { |
||
2982 | array_pop($this->stack); |
||
2983 | } |
||
2984 | } |
||
2985 | break; |
||
2986 | |||
2987 | /* An end tag whose tag name is one of: "a", "b", "big", "em", |
||
2988 | "font", "i", "nobr", "s", "small", "strike", "strong", "tt", "u" */ |
||
2989 | case 'a': |
||
2990 | case 'b': |
||
2991 | case 'big': |
||
2992 | case 'em': |
||
2993 | case 'font': |
||
2994 | case 'i': |
||
2995 | case 'nobr': |
||
2996 | case 's': |
||
2997 | case 'small': |
||
2998 | case 'strike': |
||
2999 | case 'strong': |
||
3000 | case 'tt': |
||
3001 | case 'u': |
||
3002 | /* 1. Let the formatting element be the last element in |
||
3003 | the list of active formatting elements that: |
||
3004 | * is between the end of the list and the last scope |
||
3005 | marker in the list, if any, or the start of the list |
||
3006 | otherwise, and |
||
3007 | * has the same tag name as the token. |
||
3008 | */ |
||
3009 | while (true) { |
||
3010 | for ($a = count($this->a_formatting) - 1; $a >= 0; $a--) { |
||
3011 | if ($this->a_formatting[$a] === self::MARKER) { |
||
3012 | break; |
||
3013 | |||
3014 | } elseif ($this->a_formatting[$a]->tagName === $token['name']) { |
||
3015 | $formatting_element = $this->a_formatting[$a]; |
||
3016 | $in_stack = in_array($formatting_element, $this->stack, true); |
||
3017 | $fe_af_pos = $a; |
||
3018 | break; |
||
3019 | } |
||
3020 | } |
||
3021 | |||
3022 | /* If there is no such node, or, if that node is |
||
3023 | also in the stack of open elements but the element |
||
3024 | is not in scope, then this is a parse error. Abort |
||
3025 | these steps. The token is ignored. */ |
||
3026 | if (!isset($formatting_element) || ($in_stack && |
||
3027 | !$this->elementInScope($token['name'])) |
||
3028 | ) { |
||
3029 | break; |
||
3030 | |||
3031 | /* Otherwise, if there is such a node, but that node |
||
3032 | is not in the stack of open elements, then this is a |
||
3033 | parse error; remove the element from the list, and |
||
3034 | abort these steps. */ |
||
3035 | } elseif (isset($formatting_element) && !$in_stack) { |
||
3036 | unset($this->a_formatting[$fe_af_pos]); |
||
3037 | $this->a_formatting = array_merge($this->a_formatting); |
||
3038 | break; |
||
3039 | } |
||
3040 | |||
3041 | /* 2. Let the furthest block be the topmost node in the |
||
3042 | stack of open elements that is lower in the stack |
||
3043 | than the formatting element, and is not an element in |
||
3044 | the phrasing or formatting categories. There might |
||
3045 | not be one. */ |
||
3046 | $fe_s_pos = array_search($formatting_element, $this->stack, true); |
||
3047 | $length = count($this->stack); |
||
3048 | |||
3049 | for ($s = $fe_s_pos + 1; $s < $length; $s++) { |
||
3050 | $category = $this->getElementCategory($this->stack[$s]->nodeName); |
||
3051 | |||
3052 | if ($category !== self::PHRASING && $category !== self::FORMATTING) { |
||
3053 | $furthest_block = $this->stack[$s]; |
||
3054 | } |
||
3055 | } |
||
3056 | |||
3057 | /* 3. If there is no furthest block, then the UA must |
||
3058 | skip the subsequent steps and instead just pop all |
||
3059 | the nodes from the bottom of the stack of open |
||
3060 | elements, from the current node up to the formatting |
||
3061 | element, and remove the formatting element from the |
||
3062 | list of active formatting elements. */ |
||
3063 | if (!isset($furthest_block)) { |
||
3064 | for ($n = $length - 1; $n >= $fe_s_pos; $n--) { |
||
3065 | array_pop($this->stack); |
||
3066 | } |
||
3067 | |||
3068 | unset($this->a_formatting[$fe_af_pos]); |
||
3069 | $this->a_formatting = array_merge($this->a_formatting); |
||
3070 | break; |
||
3071 | } |
||
3072 | |||
3073 | /* 4. Let the common ancestor be the element |
||
3074 | immediately above the formatting element in the stack |
||
3075 | of open elements. */ |
||
3076 | $common_ancestor = $this->stack[$fe_s_pos - 1]; |
||
3077 | |||
3078 | /* 5. If the furthest block has a parent node, then |
||
3079 | remove the furthest block from its parent node. */ |
||
3080 | if ($furthest_block->parentNode !== null) { |
||
3081 | $furthest_block->parentNode->removeChild($furthest_block); |
||
3082 | } |
||
3083 | |||
3084 | /* 6. Let a bookmark note the position of the |
||
3085 | formatting element in the list of active formatting |
||
3086 | elements relative to the elements on either side |
||
3087 | of it in the list. */ |
||
3088 | $bookmark = $fe_af_pos; |
||
3089 | |||
3090 | /* 7. Let node and last node be the furthest block. |
||
3091 | Follow these steps: */ |
||
3092 | $node = $furthest_block; |
||
3093 | $last_node = $furthest_block; |
||
3094 | |||
3095 | while (true) { |
||
3096 | for ($n = array_search($node, $this->stack, true) - 1; $n >= 0; $n--) { |
||
3097 | /* 7.1 Let node be the element immediately |
||
3098 | prior to node in the stack of open elements. */ |
||
3099 | $node = $this->stack[$n]; |
||
3100 | |||
3101 | /* 7.2 If node is not in the list of active |
||
3102 | formatting elements, then remove node from |
||
3103 | the stack of open elements and then go back |
||
3104 | to step 1. */ |
||
3105 | if (!in_array($node, $this->a_formatting, true)) { |
||
3106 | unset($this->stack[$n]); |
||
3107 | $this->stack = array_merge($this->stack); |
||
3108 | |||
3109 | } else { |
||
3110 | break; |
||
3111 | } |
||
3112 | } |
||
3113 | |||
3114 | /* 7.3 Otherwise, if node is the formatting |
||
3115 | element, then go to the next step in the overall |
||
3116 | algorithm. */ |
||
3117 | if ($node === $formatting_element) { |
||
3118 | break; |
||
3119 | |||
3120 | /* 7.4 Otherwise, if last node is the furthest |
||
3121 | block, then move the aforementioned bookmark to |
||
3122 | be immediately after the node in the list of |
||
3123 | active formatting elements. */ |
||
3124 | } elseif ($last_node === $furthest_block) { |
||
3125 | $bookmark = array_search($node, $this->a_formatting, true) + 1; |
||
3126 | } |
||
3127 | |||
3128 | /* 7.5 If node has any children, perform a |
||
3129 | shallow clone of node, replace the entry for |
||
3130 | node in the list of active formatting elements |
||
3131 | with an entry for the clone, replace the entry |
||
3132 | for node in the stack of open elements with an |
||
3133 | entry for the clone, and let node be the clone. */ |
||
3134 | if ($node->hasChildNodes()) { |
||
3135 | $clone = $node->cloneNode(); |
||
3136 | $s_pos = array_search($node, $this->stack, true); |
||
3137 | $a_pos = array_search($node, $this->a_formatting, true); |
||
3138 | |||
3139 | $this->stack[$s_pos] = $clone; |
||
3140 | $this->a_formatting[$a_pos] = $clone; |
||
3141 | $node = $clone; |
||
3142 | } |
||
3143 | |||
3144 | /* 7.6 Insert last node into node, first removing |
||
3145 | it from its previous parent node if any. */ |
||
3146 | if ($last_node->parentNode !== null) { |
||
3147 | $last_node->parentNode->removeChild($last_node); |
||
3148 | } |
||
3149 | |||
3150 | $node->appendChild($last_node); |
||
3151 | |||
3152 | /* 7.7 Let last node be node. */ |
||
3153 | $last_node = $node; |
||
3154 | } |
||
3155 | |||
3156 | /* 8. Insert whatever last node ended up being in |
||
3157 | the previous step into the common ancestor node, |
||
3158 | first removing it from its previous parent node if |
||
3159 | any. */ |
||
3160 | if ($last_node->parentNode !== null) { |
||
3161 | $last_node->parentNode->removeChild($last_node); |
||
3162 | } |
||
3163 | |||
3164 | $common_ancestor->appendChild($last_node); |
||
3165 | |||
3166 | /* 9. Perform a shallow clone of the formatting |
||
3167 | element. */ |
||
3168 | $clone = $formatting_element->cloneNode(); |
||
3169 | |||
3170 | /* 10. Take all of the child nodes of the furthest |
||
3171 | block and append them to the clone created in the |
||
3172 | last step. */ |
||
3173 | while ($furthest_block->hasChildNodes()) { |
||
3174 | $child = $furthest_block->firstChild; |
||
3175 | $furthest_block->removeChild($child); |
||
3176 | $clone->appendChild($child); |
||
3177 | } |
||
3178 | |||
3179 | /* 11. Append that clone to the furthest block. */ |
||
3180 | $furthest_block->appendChild($clone); |
||
3181 | |||
3182 | /* 12. Remove the formatting element from the list |
||
3183 | of active formatting elements, and insert the clone |
||
3184 | into the list of active formatting elements at the |
||
3185 | position of the aforementioned bookmark. */ |
||
3186 | $fe_af_pos = array_search($formatting_element, $this->a_formatting, true); |
||
3187 | unset($this->a_formatting[$fe_af_pos]); |
||
3188 | $this->a_formatting = array_merge($this->a_formatting); |
||
3189 | |||
3190 | $af_part1 = array_slice($this->a_formatting, 0, $bookmark - 1); |
||
3191 | $af_part2 = array_slice($this->a_formatting, $bookmark, count($this->a_formatting)); |
||
3192 | $this->a_formatting = array_merge($af_part1, array($clone), $af_part2); |
||
3193 | |||
3194 | /* 13. Remove the formatting element from the stack |
||
3195 | of open elements, and insert the clone into the stack |
||
3196 | of open elements immediately after (i.e. in a more |
||
3197 | deeply nested position than) the position of the |
||
3198 | furthest block in that stack. */ |
||
3199 | $fe_s_pos = array_search($formatting_element, $this->stack, true); |
||
3200 | $fb_s_pos = array_search($furthest_block, $this->stack, true); |
||
3201 | unset($this->stack[$fe_s_pos]); |
||
3202 | |||
3203 | $s_part1 = array_slice($this->stack, 0, $fb_s_pos); |
||
3204 | $s_part2 = array_slice($this->stack, $fb_s_pos + 1, count($this->stack)); |
||
3205 | $this->stack = array_merge($s_part1, array($clone), $s_part2); |
||
3206 | |||
3207 | /* 14. Jump back to step 1 in this series of steps. */ |
||
3208 | unset($formatting_element, $fe_af_pos, $fe_s_pos, $furthest_block); |
||
3209 | } |
||
3210 | break; |
||
3211 | |||
3212 | /* An end tag token whose tag name is one of: "button", |
||
3213 | "marquee", "object" */ |
||
3214 | case 'button': |
||
3215 | case 'marquee': |
||
3216 | case 'object': |
||
3217 | /* If the stack of open elements has an element in scope whose |
||
3218 | tag name matches the tag name of the token, then generate implied |
||
3219 | tags. */ |
||
3220 | if ($this->elementInScope($token['name'])) { |
||
3221 | $this->generateImpliedEndTags(); |
||
3222 | |||
3223 | /* Now, if the current node is not an element with the same |
||
3224 | tag name as the token, then this is a parse error. */ |
||
3225 | // k |
||
3226 | |||
3227 | /* Now, if the stack of open elements has an element in scope |
||
3228 | whose tag name matches the tag name of the token, then pop |
||
3229 | elements from the stack until that element has been popped from |
||
3230 | the stack, and clear the list of active formatting elements up |
||
3231 | to the last marker. */ |
||
3232 | for ($n = count($this->stack) - 1; $n >= 0; $n--) { |
||
3233 | if ($this->stack[$n]->nodeName === $token['name']) { |
||
3234 | $n = -1; |
||
3235 | } |
||
3236 | |||
3237 | array_pop($this->stack); |
||
3238 | } |
||
3239 | |||
3240 | $marker = end(array_keys($this->a_formatting, self::MARKER, true)); |
||
3241 | |||
3242 | for ($n = count($this->a_formatting) - 1; $n > $marker; $n--) { |
||
3243 | array_pop($this->a_formatting); |
||
3244 | } |
||
3245 | } |
||
3246 | break; |
||
3247 | |||
3248 | /* Or an end tag whose tag name is one of: "area", "basefont", |
||
3249 | "bgsound", "br", "embed", "hr", "iframe", "image", "img", |
||
3250 | "input", "isindex", "noembed", "noframes", "param", "select", |
||
3251 | "spacer", "table", "textarea", "wbr" */ |
||
3252 | case 'area': |
||
3253 | case 'basefont': |
||
3254 | case 'bgsound': |
||
3255 | case 'br': |
||
3256 | case 'embed': |
||
3257 | case 'hr': |
||
3258 | case 'iframe': |
||
3259 | case 'image': |
||
3260 | case 'img': |
||
3261 | case 'input': |
||
3262 | case 'isindex': |
||
3263 | case 'noembed': |
||
3264 | case 'noframes': |
||
3265 | case 'param': |
||
3266 | case 'select': |
||
3267 | case 'spacer': |
||
3268 | case 'table': |
||
3269 | case 'textarea': |
||
3270 | case 'wbr': |
||
3271 | // Parse error. Ignore the token. |
||
3272 | break; |
||
3273 | |||
3274 | /* An end tag token not covered by the previous entries */ |
||
3275 | default: |
||
3276 | for ($n = count($this->stack) - 1; $n >= 0; $n--) { |
||
3277 | /* Initialise node to be the current node (the bottommost |
||
3278 | node of the stack). */ |
||
3279 | $node = end($this->stack); |
||
3280 | |||
3281 | /* If node has the same tag name as the end tag token, |
||
3282 | then: */ |
||
3283 | if ($token['name'] === $node->nodeName) { |
||
3284 | /* Generate implied end tags. */ |
||
3285 | $this->generateImpliedEndTags(); |
||
3286 | |||
3287 | /* If the tag name of the end tag token does not |
||
3288 | match the tag name of the current node, this is a |
||
3289 | parse error. */ |
||
3290 | // k |
||
3291 | |||
3292 | /* Pop all the nodes from the current node up to |
||
3293 | node, including node, then stop this algorithm. */ |
||
3294 | for ($x = count($this->stack) - $n; $x >= $n; $x--) { |
||
3295 | array_pop($this->stack); |
||
3296 | } |
||
3297 | |||
3298 | } else { |
||
3299 | $category = $this->getElementCategory($node); |
||
3300 | |||
3301 | if ($category !== self::SPECIAL && $category !== self::SCOPING) { |
||
3302 | /* Otherwise, if node is in neither the formatting |
||
3303 | category nor the phrasing category, then this is a |
||
3304 | parse error. Stop this algorithm. The end tag token |
||
3305 | is ignored. */ |
||
3306 | return false; |
||
3307 | } |
||
3308 | } |
||
3309 | } |
||
3310 | break; |
||
3311 | } |
||
3312 | break; |
||
3313 | } |
||
3314 | } |
||
3315 | |||
3316 | private function inTable($token) |
||
3317 | { |
||
3318 | $clear = array('html', 'table'); |
||
3319 | |||
3320 | /* A character token that is one of one of U+0009 CHARACTER TABULATION, |
||
3321 | U+000A LINE FEED (LF), U+000B LINE TABULATION, U+000C FORM FEED (FF), |
||
3322 | or U+0020 SPACE */ |
||
3323 | if ($token['type'] === HTML5::CHARACTR && |
||
3324 | preg_match('/^[\t\n\x0b\x0c ]+$/', $token['data']) |
||
3325 | ) { |
||
3326 | /* Append the character to the current node. */ |
||
3327 | $text = $this->dom->createTextNode($token['data']); |
||
3328 | end($this->stack)->appendChild($text); |
||
3329 | |||
3330 | /* A comment token */ |
||
3331 | } elseif ($token['type'] === HTML5::COMMENT) { |
||
3332 | /* Append a Comment node to the current node with the data |
||
3333 | attribute set to the data given in the comment token. */ |
||
3334 | $comment = $this->dom->createComment($token['data']); |
||
3335 | end($this->stack)->appendChild($comment); |
||
3336 | |||
3337 | /* A start tag whose tag name is "caption" */ |
||
3338 | } elseif ($token['type'] === HTML5::STARTTAG && |
||
3339 | $token['name'] === 'caption' |
||
3340 | ) { |
||
3341 | /* Clear the stack back to a table context. */ |
||
3342 | $this->clearStackToTableContext($clear); |
||
3343 | |||
3344 | /* Insert a marker at the end of the list of active |
||
3345 | formatting elements. */ |
||
3346 | $this->a_formatting[] = self::MARKER; |
||
3347 | |||
3348 | /* Insert an HTML element for the token, then switch the |
||
3349 | insertion mode to "in caption". */ |
||
3350 | $this->insertElement($token); |
||
3351 | $this->mode = self::IN_CAPTION; |
||
3352 | |||
3353 | /* A start tag whose tag name is "colgroup" */ |
||
3354 | } elseif ($token['type'] === HTML5::STARTTAG && |
||
3355 | $token['name'] === 'colgroup' |
||
3356 | ) { |
||
3357 | /* Clear the stack back to a table context. */ |
||
3358 | $this->clearStackToTableContext($clear); |
||
3359 | |||
3360 | /* Insert an HTML element for the token, then switch the |
||
3361 | insertion mode to "in column group". */ |
||
3362 | $this->insertElement($token); |
||
3363 | $this->mode = self::IN_CGROUP; |
||
3364 | |||
3365 | /* A start tag whose tag name is "col" */ |
||
3366 | } elseif ($token['type'] === HTML5::STARTTAG && |
||
3367 | $token['name'] === 'col' |
||
3368 | ) { |
||
3369 | $this->inTable( |
||
3370 | array( |
||
3371 | 'name' => 'colgroup', |
||
3372 | 'type' => HTML5::STARTTAG, |
||
3373 | 'attr' => array() |
||
3374 | ) |
||
3375 | ); |
||
3376 | |||
3377 | $this->inColumnGroup($token); |
||
3378 | |||
3379 | /* A start tag whose tag name is one of: "tbody", "tfoot", "thead" */ |
||
3380 | } elseif ($token['type'] === HTML5::STARTTAG && in_array( |
||
3381 | $token['name'], |
||
3382 | array('tbody', 'tfoot', 'thead') |
||
3383 | ) |
||
3384 | ) { |
||
3385 | /* Clear the stack back to a table context. */ |
||
3386 | $this->clearStackToTableContext($clear); |
||
3387 | |||
3388 | /* Insert an HTML element for the token, then switch the insertion |
||
3389 | mode to "in table body". */ |
||
3390 | $this->insertElement($token); |
||
3391 | $this->mode = self::IN_TBODY; |
||
3392 | |||
3393 | /* A start tag whose tag name is one of: "td", "th", "tr" */ |
||
3394 | } elseif ($token['type'] === HTML5::STARTTAG && |
||
3395 | in_array($token['name'], array('td', 'th', 'tr')) |
||
3396 | ) { |
||
3397 | /* Act as if a start tag token with the tag name "tbody" had been |
||
3398 | seen, then reprocess the current token. */ |
||
3399 | $this->inTable( |
||
3400 | array( |
||
3401 | 'name' => 'tbody', |
||
3402 | 'type' => HTML5::STARTTAG, |
||
3403 | 'attr' => array() |
||
3404 | ) |
||
3405 | ); |
||
3406 | |||
3407 | return $this->inTableBody($token); |
||
3408 | |||
3409 | /* A start tag whose tag name is "table" */ |
||
3410 | } elseif ($token['type'] === HTML5::STARTTAG && |
||
3411 | $token['name'] === 'table' |
||
3412 | ) { |
||
3413 | /* Parse error. Act as if an end tag token with the tag name "table" |
||
3414 | had been seen, then, if that token wasn't ignored, reprocess the |
||
3415 | current token. */ |
||
3416 | $this->inTable( |
||
3417 | array( |
||
3418 | 'name' => 'table', |
||
3419 | 'type' => HTML5::ENDTAG |
||
3420 | ) |
||
3421 | ); |
||
3422 | |||
3423 | return $this->mainPhase($token); |
||
3424 | |||
3425 | /* An end tag whose tag name is "table" */ |
||
3426 | } elseif ($token['type'] === HTML5::ENDTAG && |
||
3427 | $token['name'] === 'table' |
||
3428 | ) { |
||
3429 | /* If the stack of open elements does not have an element in table |
||
3430 | scope with the same tag name as the token, this is a parse error. |
||
3431 | Ignore the token. (innerHTML case) */ |
||
3432 | if (!$this->elementInScope($token['name'], true)) { |
||
3433 | return false; |
||
3434 | |||
3435 | /* Otherwise: */ |
||
3436 | } else { |
||
3437 | /* Generate implied end tags. */ |
||
3438 | $this->generateImpliedEndTags(); |
||
3439 | |||
3440 | /* Now, if the current node is not a table element, then this |
||
3441 | is a parse error. */ |
||
3442 | // w/e |
||
3443 | |||
3444 | /* Pop elements from this stack until a table element has been |
||
3445 | popped from the stack. */ |
||
3446 | while (true) { |
||
3447 | $current = end($this->stack)->nodeName; |
||
3448 | array_pop($this->stack); |
||
3449 | |||
3450 | if ($current === 'table') { |
||
3451 | break; |
||
3452 | } |
||
3453 | } |
||
3454 | |||
3455 | /* Reset the insertion mode appropriately. */ |
||
3456 | $this->resetInsertionMode(); |
||
3457 | } |
||
3458 | |||
3459 | /* An end tag whose tag name is one of: "body", "caption", "col", |
||
3460 | "colgroup", "html", "tbody", "td", "tfoot", "th", "thead", "tr" */ |
||
3461 | } elseif ($token['type'] === HTML5::ENDTAG && in_array( |
||
3462 | $token['name'], |
||
3463 | array( |
||
3464 | 'body', |
||
3465 | 'caption', |
||
3466 | 'col', |
||
3467 | 'colgroup', |
||
3468 | 'html', |
||
3469 | 'tbody', |
||
3470 | 'td', |
||
3471 | 'tfoot', |
||
3472 | 'th', |
||
3473 | 'thead', |
||
3474 | 'tr' |
||
3475 | ) |
||
3476 | ) |
||
3477 | ) { |
||
3478 | // Parse error. Ignore the token. |
||
3479 | |||
3480 | /* Anything else */ |
||
3481 | } else { |
||
3482 | /* Parse error. Process the token as if the insertion mode was "in |
||
3483 | body", with the following exception: */ |
||
3484 | |||
3485 | /* If the current node is a table, tbody, tfoot, thead, or tr |
||
3486 | element, then, whenever a node would be inserted into the current |
||
3487 | node, it must instead be inserted into the foster parent element. */ |
||
3488 | if (in_array( |
||
3489 | end($this->stack)->nodeName, |
||
3490 | array('table', 'tbody', 'tfoot', 'thead', 'tr') |
||
3491 | ) |
||
3492 | ) { |
||
3493 | /* The foster parent element is the parent element of the last |
||
3494 | table element in the stack of open elements, if there is a |
||
3495 | table element and it has such a parent element. If there is no |
||
3496 | table element in the stack of open elements (innerHTML case), |
||
3497 | then the foster parent element is the first element in the |
||
3498 | stack of open elements (the html element). Otherwise, if there |
||
3499 | is a table element in the stack of open elements, but the last |
||
3500 | table element in the stack of open elements has no parent, or |
||
3501 | its parent node is not an element, then the foster parent |
||
3502 | element is the element before the last table element in the |
||
3503 | stack of open elements. */ |
||
3504 | for ($n = count($this->stack) - 1; $n >= 0; $n--) { |
||
3505 | if ($this->stack[$n]->nodeName === 'table') { |
||
3506 | $table = $this->stack[$n]; |
||
3507 | break; |
||
3508 | } |
||
3509 | } |
||
3510 | |||
3511 | if (isset($table) && $table->parentNode !== null) { |
||
3512 | $this->foster_parent = $table->parentNode; |
||
3513 | |||
3514 | } elseif (!isset($table)) { |
||
3515 | $this->foster_parent = $this->stack[0]; |
||
3516 | |||
3517 | } elseif (isset($table) && ($table->parentNode === null || |
||
3518 | $table->parentNode->nodeType !== XML_ELEMENT_NODE) |
||
3519 | ) { |
||
3520 | $this->foster_parent = $this->stack[$n - 1]; |
||
3521 | } |
||
3522 | } |
||
3523 | |||
3524 | $this->inBody($token); |
||
3525 | } |
||
3526 | } |
||
3527 | |||
3528 | private function inCaption($token) |
||
3529 | { |
||
3530 | /* An end tag whose tag name is "caption" */ |
||
3531 | if ($token['type'] === HTML5::ENDTAG && $token['name'] === 'caption') { |
||
3532 | /* If the stack of open elements does not have an element in table |
||
3533 | scope with the same tag name as the token, this is a parse error. |
||
3534 | Ignore the token. (innerHTML case) */ |
||
3535 | if (!$this->elementInScope($token['name'], true)) { |
||
3536 | // Ignore |
||
3537 | |||
3538 | /* Otherwise: */ |
||
3539 | } else { |
||
3540 | /* Generate implied end tags. */ |
||
3541 | $this->generateImpliedEndTags(); |
||
3542 | |||
3543 | /* Now, if the current node is not a caption element, then this |
||
3544 | is a parse error. */ |
||
3545 | // w/e |
||
3546 | |||
3547 | /* Pop elements from this stack until a caption element has |
||
3548 | been popped from the stack. */ |
||
3549 | while (true) { |
||
3550 | $node = end($this->stack)->nodeName; |
||
3551 | array_pop($this->stack); |
||
3552 | |||
3553 | if ($node === 'caption') { |
||
3554 | break; |
||
3555 | } |
||
3556 | } |
||
3557 | |||
3558 | /* Clear the list of active formatting elements up to the last |
||
3559 | marker. */ |
||
3560 | $this->clearTheActiveFormattingElementsUpToTheLastMarker(); |
||
3561 | |||
3562 | /* Switch the insertion mode to "in table". */ |
||
3563 | $this->mode = self::IN_TABLE; |
||
3564 | } |
||
3565 | |||
3566 | /* A start tag whose tag name is one of: "caption", "col", "colgroup", |
||
3567 | "tbody", "td", "tfoot", "th", "thead", "tr", or an end tag whose tag |
||
3568 | name is "table" */ |
||
3569 | } elseif (($token['type'] === HTML5::STARTTAG && in_array( |
||
3570 | $token['name'], |
||
3571 | array( |
||
3572 | 'caption', |
||
3573 | 'col', |
||
3574 | 'colgroup', |
||
3575 | 'tbody', |
||
3576 | 'td', |
||
3577 | 'tfoot', |
||
3578 | 'th', |
||
3579 | 'thead', |
||
3580 | 'tr' |
||
3581 | ) |
||
3582 | )) || ($token['type'] === HTML5::ENDTAG && |
||
3583 | $token['name'] === 'table') |
||
3584 | ) { |
||
3585 | /* Parse error. Act as if an end tag with the tag name "caption" |
||
3586 | had been seen, then, if that token wasn't ignored, reprocess the |
||
3587 | current token. */ |
||
3588 | $this->inCaption( |
||
3589 | array( |
||
3590 | 'name' => 'caption', |
||
3591 | 'type' => HTML5::ENDTAG |
||
3592 | ) |
||
3593 | ); |
||
3594 | |||
3595 | return $this->inTable($token); |
||
3596 | |||
3597 | /* An end tag whose tag name is one of: "body", "col", "colgroup", |
||
3598 | "html", "tbody", "td", "tfoot", "th", "thead", "tr" */ |
||
3599 | } elseif ($token['type'] === HTML5::ENDTAG && in_array( |
||
3600 | $token['name'], |
||
3601 | array( |
||
3602 | 'body', |
||
3603 | 'col', |
||
3604 | 'colgroup', |
||
3605 | 'html', |
||
3606 | 'tbody', |
||
3607 | 'tfoot', |
||
3608 | 'th', |
||
3609 | 'thead', |
||
3610 | 'tr' |
||
3611 | ) |
||
3612 | ) |
||
3613 | ) { |
||
3614 | // Parse error. Ignore the token. |
||
3615 | |||
3616 | /* Anything else */ |
||
3617 | } else { |
||
3618 | /* Process the token as if the insertion mode was "in body". */ |
||
3619 | $this->inBody($token); |
||
3620 | } |
||
3621 | } |
||
3622 | |||
3623 | private function inColumnGroup($token) |
||
3624 | { |
||
3625 | /* A character token that is one of one of U+0009 CHARACTER TABULATION, |
||
3626 | U+000A LINE FEED (LF), U+000B LINE TABULATION, U+000C FORM FEED (FF), |
||
3627 | or U+0020 SPACE */ |
||
3628 | if ($token['type'] === HTML5::CHARACTR && |
||
3629 | preg_match('/^[\t\n\x0b\x0c ]+$/', $token['data']) |
||
3630 | ) { |
||
3631 | /* Append the character to the current node. */ |
||
3632 | $text = $this->dom->createTextNode($token['data']); |
||
3633 | end($this->stack)->appendChild($text); |
||
3634 | |||
3635 | /* A comment token */ |
||
3636 | } elseif ($token['type'] === HTML5::COMMENT) { |
||
3637 | /* Append a Comment node to the current node with the data |
||
3638 | attribute set to the data given in the comment token. */ |
||
3639 | $comment = $this->dom->createComment($token['data']); |
||
3640 | end($this->stack)->appendChild($comment); |
||
3641 | |||
3642 | /* A start tag whose tag name is "col" */ |
||
3643 | } elseif ($token['type'] === HTML5::STARTTAG && $token['name'] === 'col') { |
||
3644 | /* Insert a col element for the token. Immediately pop the current |
||
3645 | node off the stack of open elements. */ |
||
3646 | $this->insertElement($token); |
||
3647 | array_pop($this->stack); |
||
3648 | |||
3649 | /* An end tag whose tag name is "colgroup" */ |
||
3650 | } elseif ($token['type'] === HTML5::ENDTAG && |
||
3651 | $token['name'] === 'colgroup' |
||
3652 | ) { |
||
3653 | /* If the current node is the root html element, then this is a |
||
3654 | parse error, ignore the token. (innerHTML case) */ |
||
3655 | if (end($this->stack)->nodeName === 'html') { |
||
3656 | // Ignore |
||
3657 | |||
3658 | /* Otherwise, pop the current node (which will be a colgroup |
||
3659 | element) from the stack of open elements. Switch the insertion |
||
3660 | mode to "in table". */ |
||
3661 | } else { |
||
3662 | array_pop($this->stack); |
||
3663 | $this->mode = self::IN_TABLE; |
||
3664 | } |
||
3665 | |||
3666 | /* An end tag whose tag name is "col" */ |
||
3667 | } elseif ($token['type'] === HTML5::ENDTAG && $token['name'] === 'col') { |
||
3668 | /* Parse error. Ignore the token. */ |
||
3669 | |||
3670 | /* Anything else */ |
||
3671 | } else { |
||
3672 | /* Act as if an end tag with the tag name "colgroup" had been seen, |
||
3673 | and then, if that token wasn't ignored, reprocess the current token. */ |
||
3674 | $this->inColumnGroup( |
||
3675 | array( |
||
3676 | 'name' => 'colgroup', |
||
3677 | 'type' => HTML5::ENDTAG |
||
3678 | ) |
||
3679 | ); |
||
3680 | |||
3681 | return $this->inTable($token); |
||
3682 | } |
||
3683 | } |
||
3684 | |||
3685 | private function inTableBody($token) |
||
3686 | { |
||
3687 | $clear = array('tbody', 'tfoot', 'thead', 'html'); |
||
3688 | |||
3689 | /* A start tag whose tag name is "tr" */ |
||
3690 | if ($token['type'] === HTML5::STARTTAG && $token['name'] === 'tr') { |
||
3691 | /* Clear the stack back to a table body context. */ |
||
3692 | $this->clearStackToTableContext($clear); |
||
3693 | |||
3694 | /* Insert a tr element for the token, then switch the insertion |
||
3695 | mode to "in row". */ |
||
3696 | $this->insertElement($token); |
||
3697 | $this->mode = self::IN_ROW; |
||
3698 | |||
3699 | /* A start tag whose tag name is one of: "th", "td" */ |
||
3700 | } elseif ($token['type'] === HTML5::STARTTAG && |
||
3701 | ($token['name'] === 'th' || $token['name'] === 'td') |
||
3702 | ) { |
||
3703 | /* Parse error. Act as if a start tag with the tag name "tr" had |
||
3704 | been seen, then reprocess the current token. */ |
||
3705 | $this->inTableBody( |
||
3706 | array( |
||
3707 | 'name' => 'tr', |
||
3708 | 'type' => HTML5::STARTTAG, |
||
3709 | 'attr' => array() |
||
3710 | ) |
||
3711 | ); |
||
3712 | |||
3713 | return $this->inRow($token); |
||
3714 | |||
3715 | /* An end tag whose tag name is one of: "tbody", "tfoot", "thead" */ |
||
3716 | } elseif ($token['type'] === HTML5::ENDTAG && |
||
3717 | in_array($token['name'], array('tbody', 'tfoot', 'thead')) |
||
3718 | ) { |
||
3719 | /* If the stack of open elements does not have an element in table |
||
3720 | scope with the same tag name as the token, this is a parse error. |
||
3721 | Ignore the token. */ |
||
3722 | if (!$this->elementInScope($token['name'], true)) { |
||
3723 | // Ignore |
||
3724 | |||
3725 | /* Otherwise: */ |
||
3726 | } else { |
||
3727 | /* Clear the stack back to a table body context. */ |
||
3728 | $this->clearStackToTableContext($clear); |
||
3729 | |||
3730 | /* Pop the current node from the stack of open elements. Switch |
||
3731 | the insertion mode to "in table". */ |
||
3732 | array_pop($this->stack); |
||
3733 | $this->mode = self::IN_TABLE; |
||
3734 | } |
||
3735 | |||
3736 | /* A start tag whose tag name is one of: "caption", "col", "colgroup", |
||
3737 | "tbody", "tfoot", "thead", or an end tag whose tag name is "table" */ |
||
3738 | } elseif (($token['type'] === HTML5::STARTTAG && in_array( |
||
3739 | $token['name'], |
||
3740 | array('caption', 'col', 'colgroup', 'tbody', 'tfoor', 'thead') |
||
3741 | )) || |
||
3742 | ($token['type'] === HTML5::STARTTAG && $token['name'] === 'table') |
||
3743 | ) { |
||
3744 | /* If the stack of open elements does not have a tbody, thead, or |
||
3745 | tfoot element in table scope, this is a parse error. Ignore the |
||
3746 | token. (innerHTML case) */ |
||
3747 | if (!$this->elementInScope(array('tbody', 'thead', 'tfoot'), true)) { |
||
3748 | // Ignore. |
||
3749 | |||
3750 | /* Otherwise: */ |
||
3751 | } else { |
||
3752 | /* Clear the stack back to a table body context. */ |
||
3753 | $this->clearStackToTableContext($clear); |
||
3754 | |||
3755 | /* Act as if an end tag with the same tag name as the current |
||
3756 | node ("tbody", "tfoot", or "thead") had been seen, then |
||
3757 | reprocess the current token. */ |
||
3758 | $this->inTableBody( |
||
3759 | array( |
||
3760 | 'name' => end($this->stack)->nodeName, |
||
3761 | 'type' => HTML5::ENDTAG |
||
3762 | ) |
||
3763 | ); |
||
3764 | |||
3765 | return $this->mainPhase($token); |
||
3766 | } |
||
3767 | |||
3768 | /* An end tag whose tag name is one of: "body", "caption", "col", |
||
3769 | "colgroup", "html", "td", "th", "tr" */ |
||
3770 | } elseif ($token['type'] === HTML5::ENDTAG && in_array( |
||
3771 | $token['name'], |
||
3772 | array('body', 'caption', 'col', 'colgroup', 'html', 'td', 'th', 'tr') |
||
3773 | ) |
||
3774 | ) { |
||
3775 | /* Parse error. Ignore the token. */ |
||
3776 | |||
3777 | /* Anything else */ |
||
3778 | } else { |
||
3779 | /* Process the token as if the insertion mode was "in table". */ |
||
3780 | $this->inTable($token); |
||
3781 | } |
||
3782 | } |
||
3783 | |||
3784 | private function inRow($token) |
||
3785 | { |
||
3786 | $clear = array('tr', 'html'); |
||
3787 | |||
3788 | /* A start tag whose tag name is one of: "th", "td" */ |
||
3789 | if ($token['type'] === HTML5::STARTTAG && |
||
3790 | ($token['name'] === 'th' || $token['name'] === 'td') |
||
3791 | ) { |
||
3792 | /* Clear the stack back to a table row context. */ |
||
3793 | $this->clearStackToTableContext($clear); |
||
3794 | |||
3795 | /* Insert an HTML element for the token, then switch the insertion |
||
3796 | mode to "in cell". */ |
||
3797 | $this->insertElement($token); |
||
3798 | $this->mode = self::IN_CELL; |
||
3799 | |||
3800 | /* Insert a marker at the end of the list of active formatting |
||
3801 | elements. */ |
||
3802 | $this->a_formatting[] = self::MARKER; |
||
3803 | |||
3804 | /* An end tag whose tag name is "tr" */ |
||
3805 | } elseif ($token['type'] === HTML5::ENDTAG && $token['name'] === 'tr') { |
||
3806 | /* If the stack of open elements does not have an element in table |
||
3807 | scope with the same tag name as the token, this is a parse error. |
||
3808 | Ignore the token. (innerHTML case) */ |
||
3809 | if (!$this->elementInScope($token['name'], true)) { |
||
3810 | // Ignore. |
||
3811 | |||
3812 | /* Otherwise: */ |
||
3813 | } else { |
||
3814 | /* Clear the stack back to a table row context. */ |
||
3815 | $this->clearStackToTableContext($clear); |
||
3816 | |||
3817 | /* Pop the current node (which will be a tr element) from the |
||
3818 | stack of open elements. Switch the insertion mode to "in table |
||
3819 | body". */ |
||
3820 | array_pop($this->stack); |
||
3821 | $this->mode = self::IN_TBODY; |
||
3822 | } |
||
3823 | |||
3824 | /* A start tag whose tag name is one of: "caption", "col", "colgroup", |
||
3825 | "tbody", "tfoot", "thead", "tr" or an end tag whose tag name is "table" */ |
||
3826 | } elseif ($token['type'] === HTML5::STARTTAG && in_array( |
||
3827 | $token['name'], |
||
3828 | array('caption', 'col', 'colgroup', 'tbody', 'tfoot', 'thead', 'tr') |
||
3829 | ) |
||
3830 | ) { |
||
3831 | /* Act as if an end tag with the tag name "tr" had been seen, then, |
||
3832 | if that token wasn't ignored, reprocess the current token. */ |
||
3833 | $this->inRow( |
||
3834 | array( |
||
3835 | 'name' => 'tr', |
||
3836 | 'type' => HTML5::ENDTAG |
||
3837 | ) |
||
3838 | ); |
||
3839 | |||
3840 | return $this->inCell($token); |
||
3841 | |||
3842 | /* An end tag whose tag name is one of: "tbody", "tfoot", "thead" */ |
||
3843 | } elseif ($token['type'] === HTML5::ENDTAG && |
||
3844 | in_array($token['name'], array('tbody', 'tfoot', 'thead')) |
||
3845 | ) { |
||
3846 | /* If the stack of open elements does not have an element in table |
||
3847 | scope with the same tag name as the token, this is a parse error. |
||
3848 | Ignore the token. */ |
||
3849 | if (!$this->elementInScope($token['name'], true)) { |
||
3850 | // Ignore. |
||
3851 | |||
3852 | /* Otherwise: */ |
||
3853 | } else { |
||
3854 | /* Otherwise, act as if an end tag with the tag name "tr" had |
||
3855 | been seen, then reprocess the current token. */ |
||
3856 | $this->inRow( |
||
3857 | array( |
||
3858 | 'name' => 'tr', |
||
3859 | 'type' => HTML5::ENDTAG |
||
3860 | ) |
||
3861 | ); |
||
3862 | |||
3863 | return $this->inCell($token); |
||
3864 | } |
||
3865 | |||
3866 | /* An end tag whose tag name is one of: "body", "caption", "col", |
||
3867 | "colgroup", "html", "td", "th" */ |
||
3868 | } elseif ($token['type'] === HTML5::ENDTAG && in_array( |
||
3869 | $token['name'], |
||
3870 | array('body', 'caption', 'col', 'colgroup', 'html', 'td', 'th', 'tr') |
||
3871 | ) |
||
3872 | ) { |
||
3873 | /* Parse error. Ignore the token. */ |
||
3874 | |||
3875 | /* Anything else */ |
||
3876 | } else { |
||
3877 | /* Process the token as if the insertion mode was "in table". */ |
||
3878 | $this->inTable($token); |
||
3879 | } |
||
3880 | } |
||
3881 | |||
3882 | private function inCell($token) |
||
3883 | { |
||
3884 | /* An end tag whose tag name is one of: "td", "th" */ |
||
3885 | if ($token['type'] === HTML5::ENDTAG && |
||
3886 | ($token['name'] === 'td' || $token['name'] === 'th') |
||
3887 | ) { |
||
3888 | /* If the stack of open elements does not have an element in table |
||
3889 | scope with the same tag name as that of the token, then this is a |
||
3890 | parse error and the token must be ignored. */ |
||
3891 | if (!$this->elementInScope($token['name'], true)) { |
||
3892 | // Ignore. |
||
3893 | |||
3894 | /* Otherwise: */ |
||
3895 | } else { |
||
3896 | /* Generate implied end tags, except for elements with the same |
||
3897 | tag name as the token. */ |
||
3898 | $this->generateImpliedEndTags(array($token['name'])); |
||
3899 | |||
3900 | /* Now, if the current node is not an element with the same tag |
||
3901 | name as the token, then this is a parse error. */ |
||
3902 | // k |
||
3903 | |||
3904 | /* Pop elements from this stack until an element with the same |
||
3905 | tag name as the token has been popped from the stack. */ |
||
3906 | while (true) { |
||
3907 | $node = end($this->stack)->nodeName; |
||
3908 | array_pop($this->stack); |
||
3909 | |||
3910 | if ($node === $token['name']) { |
||
3911 | break; |
||
3912 | } |
||
3913 | } |
||
3914 | |||
3915 | /* Clear the list of active formatting elements up to the last |
||
3916 | marker. */ |
||
3917 | $this->clearTheActiveFormattingElementsUpToTheLastMarker(); |
||
3918 | |||
3919 | /* Switch the insertion mode to "in row". (The current node |
||
3920 | will be a tr element at this point.) */ |
||
3921 | $this->mode = self::IN_ROW; |
||
3922 | } |
||
3923 | |||
3924 | /* A start tag whose tag name is one of: "caption", "col", "colgroup", |
||
3925 | "tbody", "td", "tfoot", "th", "thead", "tr" */ |
||
3926 | } elseif ($token['type'] === HTML5::STARTTAG && in_array( |
||
3927 | $token['name'], |
||
3928 | array( |
||
3929 | 'caption', |
||
3930 | 'col', |
||
3931 | 'colgroup', |
||
3932 | 'tbody', |
||
3933 | 'td', |
||
3934 | 'tfoot', |
||
3935 | 'th', |
||
3936 | 'thead', |
||
3937 | 'tr' |
||
3938 | ) |
||
3939 | ) |
||
3940 | ) { |
||
3941 | /* If the stack of open elements does not have a td or th element |
||
3942 | in table scope, then this is a parse error; ignore the token. |
||
3943 | (innerHTML case) */ |
||
3944 | if (!$this->elementInScope(array('td', 'th'), true)) { |
||
3945 | // Ignore. |
||
3946 | |||
3947 | /* Otherwise, close the cell (see below) and reprocess the current |
||
3948 | token. */ |
||
3949 | } else { |
||
3950 | $this->closeCell(); |
||
3951 | return $this->inRow($token); |
||
3952 | } |
||
3953 | |||
3954 | /* A start tag whose tag name is one of: "caption", "col", "colgroup", |
||
3955 | "tbody", "td", "tfoot", "th", "thead", "tr" */ |
||
3956 | } elseif ($token['type'] === HTML5::STARTTAG && in_array( |
||
3957 | $token['name'], |
||
3958 | array( |
||
3959 | 'caption', |
||
3960 | 'col', |
||
3961 | 'colgroup', |
||
3962 | 'tbody', |
||
3963 | 'td', |
||
3964 | 'tfoot', |
||
3965 | 'th', |
||
3966 | 'thead', |
||
3967 | 'tr' |
||
3968 | ) |
||
3969 | ) |
||
3970 | ) { |
||
3971 | /* If the stack of open elements does not have a td or th element |
||
3972 | in table scope, then this is a parse error; ignore the token. |
||
3973 | (innerHTML case) */ |
||
3974 | if (!$this->elementInScope(array('td', 'th'), true)) { |
||
3975 | // Ignore. |
||
3976 | |||
3977 | /* Otherwise, close the cell (see below) and reprocess the current |
||
3978 | token. */ |
||
3979 | } else { |
||
3980 | $this->closeCell(); |
||
3981 | return $this->inRow($token); |
||
3982 | } |
||
3983 | |||
3984 | /* An end tag whose tag name is one of: "body", "caption", "col", |
||
3985 | "colgroup", "html" */ |
||
3986 | } elseif ($token['type'] === HTML5::ENDTAG && in_array( |
||
3987 | $token['name'], |
||
3988 | array('body', 'caption', 'col', 'colgroup', 'html') |
||
3989 | ) |
||
3990 | ) { |
||
3991 | /* Parse error. Ignore the token. */ |
||
3992 | |||
3993 | /* An end tag whose tag name is one of: "table", "tbody", "tfoot", |
||
3994 | "thead", "tr" */ |
||
3995 | } elseif ($token['type'] === HTML5::ENDTAG && in_array( |
||
3996 | $token['name'], |
||
3997 | array('table', 'tbody', 'tfoot', 'thead', 'tr') |
||
3998 | ) |
||
3999 | ) { |
||
4000 | /* If the stack of open elements does not have an element in table |
||
4001 | scope with the same tag name as that of the token (which can only |
||
4002 | happen for "tbody", "tfoot" and "thead", or, in the innerHTML case), |
||
4003 | then this is a parse error and the token must be ignored. */ |
||
4004 | if (!$this->elementInScope($token['name'], true)) { |
||
4005 | // Ignore. |
||
4006 | |||
4007 | /* Otherwise, close the cell (see below) and reprocess the current |
||
4008 | token. */ |
||
4009 | } else { |
||
4010 | $this->closeCell(); |
||
4011 | return $this->inRow($token); |
||
4012 | } |
||
4013 | |||
4014 | /* Anything else */ |
||
4015 | } else { |
||
4016 | /* Process the token as if the insertion mode was "in body". */ |
||
4017 | $this->inBody($token); |
||
4018 | } |
||
4019 | } |
||
4020 | |||
4021 | private function inSelect($token) |
||
4022 | { |
||
4023 | /* Handle the token as follows: */ |
||
4024 | |||
4025 | /* A character token */ |
||
4026 | if ($token['type'] === HTML5::CHARACTR) { |
||
4027 | /* Append the token's character to the current node. */ |
||
4028 | $this->insertText($token['data']); |
||
4029 | |||
4030 | /* A comment token */ |
||
4031 | } elseif ($token['type'] === HTML5::COMMENT) { |
||
4032 | /* Append a Comment node to the current node with the data |
||
4033 | attribute set to the data given in the comment token. */ |
||
4034 | $this->insertComment($token['data']); |
||
4035 | |||
4036 | /* A start tag token whose tag name is "option" */ |
||
4037 | } elseif ($token['type'] === HTML5::STARTTAG && |
||
4038 | $token['name'] === 'option' |
||
4039 | ) { |
||
4040 | /* If the current node is an option element, act as if an end tag |
||
4041 | with the tag name "option" had been seen. */ |
||
4042 | if (end($this->stack)->nodeName === 'option') { |
||
4043 | $this->inSelect( |
||
4044 | array( |
||
4045 | 'name' => 'option', |
||
4046 | 'type' => HTML5::ENDTAG |
||
4047 | ) |
||
4048 | ); |
||
4049 | } |
||
4050 | |||
4051 | /* Insert an HTML element for the token. */ |
||
4052 | $this->insertElement($token); |
||
4053 | |||
4054 | /* A start tag token whose tag name is "optgroup" */ |
||
4055 | } elseif ($token['type'] === HTML5::STARTTAG && |
||
4056 | $token['name'] === 'optgroup' |
||
4057 | ) { |
||
4058 | /* If the current node is an option element, act as if an end tag |
||
4059 | with the tag name "option" had been seen. */ |
||
4060 | if (end($this->stack)->nodeName === 'option') { |
||
4061 | $this->inSelect( |
||
4062 | array( |
||
4063 | 'name' => 'option', |
||
4064 | 'type' => HTML5::ENDTAG |
||
4065 | ) |
||
4066 | ); |
||
4067 | } |
||
4068 | |||
4069 | /* If the current node is an optgroup element, act as if an end tag |
||
4070 | with the tag name "optgroup" had been seen. */ |
||
4071 | if (end($this->stack)->nodeName === 'optgroup') { |
||
4072 | $this->inSelect( |
||
4073 | array( |
||
4074 | 'name' => 'optgroup', |
||
4075 | 'type' => HTML5::ENDTAG |
||
4076 | ) |
||
4077 | ); |
||
4078 | } |
||
4079 | |||
4080 | /* Insert an HTML element for the token. */ |
||
4081 | $this->insertElement($token); |
||
4082 | |||
4083 | /* An end tag token whose tag name is "optgroup" */ |
||
4084 | } elseif ($token['type'] === HTML5::ENDTAG && |
||
4085 | $token['name'] === 'optgroup' |
||
4086 | ) { |
||
4087 | /* First, if the current node is an option element, and the node |
||
4088 | immediately before it in the stack of open elements is an optgroup |
||
4089 | element, then act as if an end tag with the tag name "option" had |
||
4090 | been seen. */ |
||
4091 | $elements_in_stack = count($this->stack); |
||
4092 | |||
4093 | if ($this->stack[$elements_in_stack - 1]->nodeName === 'option' && |
||
4094 | $this->stack[$elements_in_stack - 2]->nodeName === 'optgroup' |
||
4095 | ) { |
||
4096 | $this->inSelect( |
||
4097 | array( |
||
4098 | 'name' => 'option', |
||
4099 | 'type' => HTML5::ENDTAG |
||
4100 | ) |
||
4101 | ); |
||
4102 | } |
||
4103 | |||
4104 | /* If the current node is an optgroup element, then pop that node |
||
4105 | from the stack of open elements. Otherwise, this is a parse error, |
||
4106 | ignore the token. */ |
||
4107 | if ($this->stack[$elements_in_stack - 1] === 'optgroup') { |
||
4108 | array_pop($this->stack); |
||
4109 | } |
||
4110 | |||
4111 | /* An end tag token whose tag name is "option" */ |
||
4112 | } elseif ($token['type'] === HTML5::ENDTAG && |
||
4113 | $token['name'] === 'option' |
||
4114 | ) { |
||
4115 | /* If the current node is an option element, then pop that node |
||
4116 | from the stack of open elements. Otherwise, this is a parse error, |
||
4117 | ignore the token. */ |
||
4118 | if (end($this->stack)->nodeName === 'option') { |
||
4119 | array_pop($this->stack); |
||
4120 | } |
||
4121 | |||
4122 | /* An end tag whose tag name is "select" */ |
||
4123 | } elseif ($token['type'] === HTML5::ENDTAG && |
||
4124 | $token['name'] === 'select' |
||
4125 | ) { |
||
4126 | /* If the stack of open elements does not have an element in table |
||
4127 | scope with the same tag name as the token, this is a parse error. |
||
4128 | Ignore the token. (innerHTML case) */ |
||
4129 | if (!$this->elementInScope($token['name'], true)) { |
||
4130 | // w/e |
||
4131 | |||
4132 | /* Otherwise: */ |
||
4133 | } else { |
||
4134 | /* Pop elements from the stack of open elements until a select |
||
4135 | element has been popped from the stack. */ |
||
4136 | while (true) { |
||
4137 | $current = end($this->stack)->nodeName; |
||
4138 | array_pop($this->stack); |
||
4139 | |||
4140 | if ($current === 'select') { |
||
4141 | break; |
||
4142 | } |
||
4143 | } |
||
4144 | |||
4145 | /* Reset the insertion mode appropriately. */ |
||
4146 | $this->resetInsertionMode(); |
||
4147 | } |
||
4148 | |||
4149 | /* A start tag whose tag name is "select" */ |
||
4150 | } elseif ($token['name'] === 'select' && |
||
4151 | $token['type'] === HTML5::STARTTAG |
||
4152 | ) { |
||
4153 | /* Parse error. Act as if the token had been an end tag with the |
||
4154 | tag name "select" instead. */ |
||
4155 | $this->inSelect( |
||
4156 | array( |
||
4157 | 'name' => 'select', |
||
4158 | 'type' => HTML5::ENDTAG |
||
4159 | ) |
||
4160 | ); |
||
4161 | |||
4162 | /* An end tag whose tag name is one of: "caption", "table", "tbody", |
||
4163 | "tfoot", "thead", "tr", "td", "th" */ |
||
4164 | } elseif (in_array( |
||
4165 | $token['name'], |
||
4166 | array( |
||
4167 | 'caption', |
||
4168 | 'table', |
||
4169 | 'tbody', |
||
4170 | 'tfoot', |
||
4171 | 'thead', |
||
4172 | 'tr', |
||
4173 | 'td', |
||
4174 | 'th' |
||
4175 | ) |
||
4176 | ) && $token['type'] === HTML5::ENDTAG |
||
4177 | ) { |
||
4178 | /* Parse error. */ |
||
4179 | // w/e |
||
4180 | |||
4181 | /* If the stack of open elements has an element in table scope with |
||
4182 | the same tag name as that of the token, then act as if an end tag |
||
4183 | with the tag name "select" had been seen, and reprocess the token. |
||
4184 | Otherwise, ignore the token. */ |
||
4185 | if ($this->elementInScope($token['name'], true)) { |
||
4186 | $this->inSelect( |
||
4187 | array( |
||
4188 | 'name' => 'select', |
||
4189 | 'type' => HTML5::ENDTAG |
||
4190 | ) |
||
4191 | ); |
||
4192 | |||
4193 | $this->mainPhase($token); |
||
4194 | } |
||
4195 | |||
4196 | /* Anything else */ |
||
4197 | } else { |
||
4198 | /* Parse error. Ignore the token. */ |
||
4199 | } |
||
4200 | } |
||
4201 | |||
4202 | private function afterBody($token) |
||
4203 | { |
||
4204 | /* Handle the token as follows: */ |
||
4205 | |||
4206 | /* A character token that is one of one of U+0009 CHARACTER TABULATION, |
||
4207 | U+000A LINE FEED (LF), U+000B LINE TABULATION, U+000C FORM FEED (FF), |
||
4208 | or U+0020 SPACE */ |
||
4209 | if ($token['type'] === HTML5::CHARACTR && |
||
4210 | preg_match('/^[\t\n\x0b\x0c ]+$/', $token['data']) |
||
4211 | ) { |
||
4212 | /* Process the token as it would be processed if the insertion mode |
||
4213 | was "in body". */ |
||
4214 | $this->inBody($token); |
||
4215 | |||
4216 | /* A comment token */ |
||
4217 | } elseif ($token['type'] === HTML5::COMMENT) { |
||
4218 | /* Append a Comment node to the first element in the stack of open |
||
4219 | elements (the html element), with the data attribute set to the |
||
4220 | data given in the comment token. */ |
||
4221 | $comment = $this->dom->createComment($token['data']); |
||
4222 | $this->stack[0]->appendChild($comment); |
||
4223 | |||
4224 | /* An end tag with the tag name "html" */ |
||
4225 | } elseif ($token['type'] === HTML5::ENDTAG && $token['name'] === 'html') { |
||
4226 | /* If the parser was originally created in order to handle the |
||
4227 | setting of an element's innerHTML attribute, this is a parse error; |
||
4228 | ignore the token. (The element will be an html element in this |
||
4229 | case.) (innerHTML case) */ |
||
4230 | |||
4231 | /* Otherwise, switch to the trailing end phase. */ |
||
4232 | $this->phase = self::END_PHASE; |
||
4233 | |||
4234 | /* Anything else */ |
||
4235 | } else { |
||
4236 | /* Parse error. Set the insertion mode to "in body" and reprocess |
||
4237 | the token. */ |
||
4238 | $this->mode = self::IN_BODY; |
||
4239 | return $this->inBody($token); |
||
4240 | } |
||
4241 | } |
||
4242 | |||
4243 | private function inFrameset($token) |
||
4244 | { |
||
4245 | /* Handle the token as follows: */ |
||
4246 | |||
4247 | /* A character token that is one of one of U+0009 CHARACTER TABULATION, |
||
4248 | U+000A LINE FEED (LF), U+000B LINE TABULATION, U+000C FORM FEED (FF), |
||
4249 | U+000D CARRIAGE RETURN (CR), or U+0020 SPACE */ |
||
4250 | if ($token['type'] === HTML5::CHARACTR && |
||
4251 | preg_match('/^[\t\n\x0b\x0c ]+$/', $token['data']) |
||
4252 | ) { |
||
4253 | /* Append the character to the current node. */ |
||
4254 | $this->insertText($token['data']); |
||
4255 | |||
4256 | /* A comment token */ |
||
4257 | } elseif ($token['type'] === HTML5::COMMENT) { |
||
4258 | /* Append a Comment node to the current node with the data |
||
4259 | attribute set to the data given in the comment token. */ |
||
4260 | $this->insertComment($token['data']); |
||
4261 | |||
4262 | /* A start tag with the tag name "frameset" */ |
||
4263 | } elseif ($token['name'] === 'frameset' && |
||
4264 | $token['type'] === HTML5::STARTTAG |
||
4265 | ) { |
||
4266 | $this->insertElement($token); |
||
4267 | |||
4268 | /* An end tag with the tag name "frameset" */ |
||
4269 | } elseif ($token['name'] === 'frameset' && |
||
4270 | $token['type'] === HTML5::ENDTAG |
||
4271 | ) { |
||
4272 | /* If the current node is the root html element, then this is a |
||
4273 | parse error; ignore the token. (innerHTML case) */ |
||
4274 | if (end($this->stack)->nodeName === 'html') { |
||
4275 | // Ignore |
||
4276 | |||
4277 | } else { |
||
4278 | /* Otherwise, pop the current node from the stack of open |
||
4279 | elements. */ |
||
4280 | array_pop($this->stack); |
||
4281 | |||
4282 | /* If the parser was not originally created in order to handle |
||
4283 | the setting of an element's innerHTML attribute (innerHTML case), |
||
4284 | and the current node is no longer a frameset element, then change |
||
4285 | the insertion mode to "after frameset". */ |
||
4286 | $this->mode = self::AFTR_FRAME; |
||
4287 | } |
||
4288 | |||
4289 | /* A start tag with the tag name "frame" */ |
||
4290 | } elseif ($token['name'] === 'frame' && |
||
4291 | $token['type'] === HTML5::STARTTAG |
||
4292 | ) { |
||
4293 | /* Insert an HTML element for the token. */ |
||
4294 | $this->insertElement($token); |
||
4295 | |||
4296 | /* Immediately pop the current node off the stack of open elements. */ |
||
4297 | array_pop($this->stack); |
||
4298 | |||
4299 | /* A start tag with the tag name "noframes" */ |
||
4300 | } elseif ($token['name'] === 'noframes' && |
||
4301 | $token['type'] === HTML5::STARTTAG |
||
4302 | ) { |
||
4303 | /* Process the token as if the insertion mode had been "in body". */ |
||
4304 | $this->inBody($token); |
||
4305 | |||
4306 | /* Anything else */ |
||
4307 | } else { |
||
4308 | /* Parse error. Ignore the token. */ |
||
4309 | } |
||
4310 | } |
||
4311 | |||
4312 | private function afterFrameset($token) |
||
4313 | { |
||
4314 | /* Handle the token as follows: */ |
||
4315 | |||
4316 | /* A character token that is one of one of U+0009 CHARACTER TABULATION, |
||
4317 | U+000A LINE FEED (LF), U+000B LINE TABULATION, U+000C FORM FEED (FF), |
||
4318 | U+000D CARRIAGE RETURN (CR), or U+0020 SPACE */ |
||
4319 | if ($token['type'] === HTML5::CHARACTR && |
||
4320 | preg_match('/^[\t\n\x0b\x0c ]+$/', $token['data']) |
||
4321 | ) { |
||
4322 | /* Append the character to the current node. */ |
||
4323 | $this->insertText($token['data']); |
||
4324 | |||
4325 | /* A comment token */ |
||
4326 | } elseif ($token['type'] === HTML5::COMMENT) { |
||
4327 | /* Append a Comment node to the current node with the data |
||
4328 | attribute set to the data given in the comment token. */ |
||
4329 | $this->insertComment($token['data']); |
||
4330 | |||
4331 | /* An end tag with the tag name "html" */ |
||
4332 | } elseif ($token['name'] === 'html' && |
||
4333 | $token['type'] === HTML5::ENDTAG |
||
4334 | ) { |
||
4335 | /* Switch to the trailing end phase. */ |
||
4336 | $this->phase = self::END_PHASE; |
||
4337 | |||
4338 | /* A start tag with the tag name "noframes" */ |
||
4339 | } elseif ($token['name'] === 'noframes' && |
||
4340 | $token['type'] === HTML5::STARTTAG |
||
4341 | ) { |
||
4342 | /* Process the token as if the insertion mode had been "in body". */ |
||
4343 | $this->inBody($token); |
||
4344 | |||
4345 | /* Anything else */ |
||
4346 | } else { |
||
4347 | /* Parse error. Ignore the token. */ |
||
4348 | } |
||
4349 | } |
||
4350 | |||
4351 | private function trailingEndPhase($token) |
||
4352 | { |
||
4353 | /* After the main phase, as each token is emitted from the tokenisation |
||
4354 | stage, it must be processed as described in this section. */ |
||
4355 | |||
4356 | /* A DOCTYPE token */ |
||
4357 | if ($token['type'] === HTML5::DOCTYPE) { |
||
4358 | // Parse error. Ignore the token. |
||
4359 | |||
4360 | /* A comment token */ |
||
4361 | } elseif ($token['type'] === HTML5::COMMENT) { |
||
4362 | /* Append a Comment node to the Document object with the data |
||
4363 | attribute set to the data given in the comment token. */ |
||
4364 | $comment = $this->dom->createComment($token['data']); |
||
4365 | $this->dom->appendChild($comment); |
||
4366 | |||
4367 | /* A character token that is one of one of U+0009 CHARACTER TABULATION, |
||
4368 | U+000A LINE FEED (LF), U+000B LINE TABULATION, U+000C FORM FEED (FF), |
||
4369 | or U+0020 SPACE */ |
||
4370 | } elseif ($token['type'] === HTML5::CHARACTR && |
||
4371 | preg_match('/^[\t\n\x0b\x0c ]+$/', $token['data']) |
||
4372 | ) { |
||
4373 | /* Process the token as it would be processed in the main phase. */ |
||
4374 | $this->mainPhase($token); |
||
4375 | |||
4376 | /* A character token that is not one of U+0009 CHARACTER TABULATION, |
||
4377 | U+000A LINE FEED (LF), U+000B LINE TABULATION, U+000C FORM FEED (FF), |
||
4378 | or U+0020 SPACE. Or a start tag token. Or an end tag token. */ |
||
4379 | } elseif (($token['type'] === HTML5::CHARACTR && |
||
4380 | preg_match('/^[\t\n\x0b\x0c ]+$/', $token['data'])) || |
||
4381 | $token['type'] === HTML5::STARTTAG || $token['type'] === HTML5::ENDTAG |
||
4382 | ) { |
||
4383 | /* Parse error. Switch back to the main phase and reprocess the |
||
4384 | token. */ |
||
4385 | $this->phase = self::MAIN_PHASE; |
||
4386 | return $this->mainPhase($token); |
||
4387 | |||
4388 | /* An end-of-file token */ |
||
4389 | } elseif ($token['type'] === HTML5::EOF) { |
||
4390 | /* OMG DONE!! */ |
||
4391 | } |
||
4392 | } |
||
4393 | |||
4394 | private function insertElement($token, $append = true, $check = false) |
||
4395 | { |
||
4396 | // Proprietary workaround for libxml2's limitations with tag names |
||
4397 | if ($check) { |
||
4398 | // Slightly modified HTML5 tag-name modification, |
||
4399 | // removing anything that's not an ASCII letter, digit, or hyphen |
||
4400 | $token['name'] = preg_replace('/[^a-z0-9-]/i', '', $token['name']); |
||
4401 | // Remove leading hyphens and numbers |
||
4402 | $token['name'] = ltrim($token['name'], '-0..9'); |
||
4403 | // In theory, this should ever be needed, but just in case |
||
4404 | if ($token['name'] === '') { |
||
4405 | $token['name'] = 'span'; |
||
4406 | } // arbitrary generic choice |
||
4407 | } |
||
4408 | |||
4409 | $el = $this->dom->createElement($token['name']); |
||
4410 | |||
4411 | foreach ($token['attr'] as $attr) { |
||
4412 | if (!$el->hasAttribute($attr['name'])) { |
||
4413 | $el->setAttribute($attr['name'], (string)$attr['value']); |
||
4414 | } |
||
4415 | } |
||
4416 | |||
4417 | $this->appendToRealParent($el); |
||
4418 | $this->stack[] = $el; |
||
4419 | |||
4420 | return $el; |
||
4421 | } |
||
4422 | |||
4423 | private function insertText($data) |
||
4424 | { |
||
4425 | $text = $this->dom->createTextNode($data); |
||
4426 | $this->appendToRealParent($text); |
||
4427 | } |
||
4428 | |||
4429 | private function insertComment($data) |
||
4430 | { |
||
4431 | $comment = $this->dom->createComment($data); |
||
4432 | $this->appendToRealParent($comment); |
||
4433 | } |
||
4434 | |||
4435 | private function appendToRealParent($node) |
||
4436 | { |
||
4437 | if ($this->foster_parent === null) { |
||
4438 | end($this->stack)->appendChild($node); |
||
4439 | |||
4440 | } elseif ($this->foster_parent !== null) { |
||
4441 | /* If the foster parent element is the parent element of the |
||
4442 | last table element in the stack of open elements, then the new |
||
4443 | node must be inserted immediately before the last table element |
||
4444 | in the stack of open elements in the foster parent element; |
||
4445 | otherwise, the new node must be appended to the foster parent |
||
4446 | element. */ |
||
4447 | for ($n = count($this->stack) - 1; $n >= 0; $n--) { |
||
4448 | if ($this->stack[$n]->nodeName === 'table' && |
||
4449 | $this->stack[$n]->parentNode !== null |
||
4450 | ) { |
||
4451 | $table = $this->stack[$n]; |
||
4452 | break; |
||
4453 | } |
||
4454 | } |
||
4455 | |||
4456 | if (isset($table) && $this->foster_parent->isSameNode($table->parentNode)) { |
||
4457 | $this->foster_parent->insertBefore($node, $table); |
||
4458 | } else { |
||
4459 | $this->foster_parent->appendChild($node); |
||
4460 | } |
||
4461 | |||
4462 | $this->foster_parent = null; |
||
4463 | } |
||
4464 | } |
||
4465 | |||
4466 | private function elementInScope($el, $table = false) |
||
4467 | { |
||
4468 | if (is_array($el)) { |
||
4469 | foreach ($el as $element) { |
||
4470 | if ($this->elementInScope($element, $table)) { |
||
4471 | return true; |
||
4472 | } |
||
4473 | } |
||
4474 | |||
4475 | return false; |
||
4476 | } |
||
4477 | |||
4478 | $leng = count($this->stack); |
||
4479 | |||
4480 | for ($n = 0; $n < $leng; $n++) { |
||
4481 | /* 1. Initialise node to be the current node (the bottommost node of |
||
4482 | the stack). */ |
||
4483 | $node = $this->stack[$leng - 1 - $n]; |
||
4484 | |||
4485 | if ($node->tagName === $el) { |
||
4486 | /* 2. If node is the target node, terminate in a match state. */ |
||
4487 | return true; |
||
4488 | |||
4489 | } elseif ($node->tagName === 'table') { |
||
4490 | /* 3. Otherwise, if node is a table element, terminate in a failure |
||
4491 | state. */ |
||
4492 | return false; |
||
4493 | |||
4494 | } elseif ($table === true && in_array( |
||
4495 | $node->tagName, |
||
4496 | array( |
||
4497 | 'caption', |
||
4498 | 'td', |
||
4499 | 'th', |
||
4500 | 'button', |
||
4501 | 'marquee', |
||
4502 | 'object' |
||
4503 | ) |
||
4504 | ) |
||
4505 | ) { |
||
4506 | /* 4. Otherwise, if the algorithm is the "has an element in scope" |
||
4507 | variant (rather than the "has an element in table scope" variant), |
||
4508 | and node is one of the following, terminate in a failure state. */ |
||
4509 | return false; |
||
4510 | |||
4511 | } elseif ($node === $node->ownerDocument->documentElement) { |
||
4512 | /* 5. Otherwise, if node is an html element (root element), terminate |
||
4513 | in a failure state. (This can only happen if the node is the topmost |
||
4514 | node of the stack of open elements, and prevents the next step from |
||
4515 | being invoked if there are no more elements in the stack.) */ |
||
4516 | return false; |
||
4517 | } |
||
4518 | |||
4519 | /* Otherwise, set node to the previous entry in the stack of open |
||
4520 | elements and return to step 2. (This will never fail, since the loop |
||
4521 | will always terminate in the previous step if the top of the stack |
||
4522 | is reached.) */ |
||
4523 | } |
||
4524 | } |
||
4525 | |||
4526 | private function reconstructActiveFormattingElements() |
||
4527 | { |
||
4528 | /* 1. If there are no entries in the list of active formatting elements, |
||
4529 | then there is nothing to reconstruct; stop this algorithm. */ |
||
4530 | $formatting_elements = count($this->a_formatting); |
||
4531 | |||
4532 | if ($formatting_elements === 0) { |
||
4533 | return false; |
||
4534 | } |
||
4535 | |||
4536 | /* 3. Let entry be the last (most recently added) element in the list |
||
4537 | of active formatting elements. */ |
||
4538 | $entry = end($this->a_formatting); |
||
4539 | |||
4540 | /* 2. If the last (most recently added) entry in the list of active |
||
4541 | formatting elements is a marker, or if it is an element that is in the |
||
4542 | stack of open elements, then there is nothing to reconstruct; stop this |
||
4543 | algorithm. */ |
||
4544 | if ($entry === self::MARKER || in_array($entry, $this->stack, true)) { |
||
4545 | return false; |
||
4546 | } |
||
4547 | |||
4548 | for ($a = $formatting_elements - 1; $a >= 0; true) { |
||
4549 | /* 4. If there are no entries before entry in the list of active |
||
4550 | formatting elements, then jump to step 8. */ |
||
4551 | if ($a === 0) { |
||
4552 | $step_seven = false; |
||
4553 | break; |
||
4554 | } |
||
4555 | |||
4556 | /* 5. Let entry be the entry one earlier than entry in the list of |
||
4557 | active formatting elements. */ |
||
4558 | $a--; |
||
4559 | $entry = $this->a_formatting[$a]; |
||
4560 | |||
4561 | /* 6. If entry is neither a marker nor an element that is also in |
||
4562 | thetack of open elements, go to step 4. */ |
||
4563 | if ($entry === self::MARKER || in_array($entry, $this->stack, true)) { |
||
4564 | break; |
||
4565 | } |
||
4566 | } |
||
4567 | |||
4568 | while (true) { |
||
4569 | /* 7. Let entry be the element one later than entry in the list of |
||
4570 | active formatting elements. */ |
||
4571 | if (isset($step_seven) && $step_seven === true) { |
||
4572 | $a++; |
||
4573 | $entry = $this->a_formatting[$a]; |
||
4574 | } |
||
4575 | |||
4576 | /* 8. Perform a shallow clone of the element entry to obtain clone. */ |
||
4577 | $clone = $entry->cloneNode(); |
||
4578 | |||
4579 | /* 9. Append clone to the current node and push it onto the stack |
||
4580 | of open elements so that it is the new current node. */ |
||
4581 | end($this->stack)->appendChild($clone); |
||
4582 | $this->stack[] = $clone; |
||
4583 | |||
4584 | /* 10. Replace the entry for entry in the list with an entry for |
||
4585 | clone. */ |
||
4586 | $this->a_formatting[$a] = $clone; |
||
4587 | |||
4588 | /* 11. If the entry for clone in the list of active formatting |
||
4589 | elements is not the last entry in the list, return to step 7. */ |
||
4590 | if (end($this->a_formatting) !== $clone) { |
||
4591 | $step_seven = true; |
||
4592 | } else { |
||
4593 | break; |
||
4594 | } |
||
4595 | } |
||
4596 | } |
||
4597 | |||
4598 | private function clearTheActiveFormattingElementsUpToTheLastMarker() |
||
4599 | { |
||
4600 | /* When the steps below require the UA to clear the list of active |
||
4601 | formatting elements up to the last marker, the UA must perform the |
||
4602 | following steps: */ |
||
4603 | |||
4604 | while (true) { |
||
4605 | /* 1. Let entry be the last (most recently added) entry in the list |
||
4606 | of active formatting elements. */ |
||
4607 | $entry = end($this->a_formatting); |
||
4608 | |||
4609 | /* 2. Remove entry from the list of active formatting elements. */ |
||
4610 | array_pop($this->a_formatting); |
||
4611 | |||
4612 | /* 3. If entry was a marker, then stop the algorithm at this point. |
||
4613 | The list has been cleared up to the last marker. */ |
||
4614 | if ($entry === self::MARKER) { |
||
4615 | break; |
||
4616 | } |
||
4617 | } |
||
4618 | } |
||
4619 | |||
4620 | private function generateImpliedEndTags($exclude = array()) |
||
4621 | { |
||
4622 | /* When the steps below require the UA to generate implied end tags, |
||
4623 | then, if the current node is a dd element, a dt element, an li element, |
||
4624 | a p element, a td element, a th element, or a tr element, the UA must |
||
4625 | act as if an end tag with the respective tag name had been seen and |
||
4626 | then generate implied end tags again. */ |
||
4627 | $node = end($this->stack); |
||
4628 | $elements = array_diff(array('dd', 'dt', 'li', 'p', 'td', 'th', 'tr'), $exclude); |
||
4629 | |||
4630 | while (in_array(end($this->stack)->nodeName, $elements)) { |
||
4631 | array_pop($this->stack); |
||
4632 | } |
||
4633 | } |
||
4634 | |||
4635 | private function getElementCategory($node) |
||
4646 | } |
||
4647 | } |
||
4648 | |||
4649 | private function clearStackToTableContext($elements) |
||
4663 | } |
||
4664 | } |
||
4665 | } |
||
4666 | |||
4667 | private function resetInsertionMode() |
||
4668 | { |
||
4669 | /* 1. Let last be false. */ |
||
4670 | $last = false; |
||
4671 | $leng = count($this->stack); |
||
4672 | |||
4673 | for ($n = $leng - 1; $n >= 0; $n--) { |
||
4674 | /* 2. Let node be the last node in the stack of open elements. */ |
||
4675 | $node = $this->stack[$n]; |
||
4676 | |||
4677 | /* 3. If node is the first node in the stack of open elements, then |
||
4678 | set last to true. If the element whose innerHTML attribute is being |
||
4679 | set is neither a td element nor a th element, then set node to the |
||
4680 | element whose innerHTML attribute is being set. (innerHTML case) */ |
||
4681 | if ($this->stack[0]->isSameNode($node)) { |
||
4682 | $last = true; |
||
4683 | } |
||
4684 | |||
4685 | /* 4. If node is a select element, then switch the insertion mode to |
||
4686 | "in select" and abort these steps. (innerHTML case) */ |
||
4687 | if ($node->nodeName === 'select') { |
||
4688 | $this->mode = self::IN_SELECT; |
||
4689 | break; |
||
4690 | |||
4691 | /* 5. If node is a td or th element, then switch the insertion mode |
||
4692 | to "in cell" and abort these steps. */ |
||
4693 | } elseif ($node->nodeName === 'td' || $node->nodeName === 'th') { |
||
4694 | $this->mode = self::IN_CELL; |
||
4695 | break; |
||
4696 | |||
4697 | /* 6. If node is a tr element, then switch the insertion mode to |
||
4698 | "in row" and abort these steps. */ |
||
4699 | } elseif ($node->nodeName === 'tr') { |
||
4700 | $this->mode = self::IN_ROW; |
||
4701 | break; |
||
4702 | |||
4703 | /* 7. If node is a tbody, thead, or tfoot element, then switch the |
||
4704 | insertion mode to "in table body" and abort these steps. */ |
||
4705 | } elseif (in_array($node->nodeName, array('tbody', 'thead', 'tfoot'))) { |
||
4706 | $this->mode = self::IN_TBODY; |
||
4707 | break; |
||
4708 | |||
4709 | /* 8. If node is a caption element, then switch the insertion mode |
||
4710 | to "in caption" and abort these steps. */ |
||
4711 | } elseif ($node->nodeName === 'caption') { |
||
4712 | $this->mode = self::IN_CAPTION; |
||
4713 | break; |
||
4714 | |||
4715 | /* 9. If node is a colgroup element, then switch the insertion mode |
||
4716 | to "in column group" and abort these steps. (innerHTML case) */ |
||
4717 | } elseif ($node->nodeName === 'colgroup') { |
||
4718 | $this->mode = self::IN_CGROUP; |
||
4719 | break; |
||
4720 | |||
4721 | /* 10. If node is a table element, then switch the insertion mode |
||
4722 | to "in table" and abort these steps. */ |
||
4723 | } elseif ($node->nodeName === 'table') { |
||
4724 | $this->mode = self::IN_TABLE; |
||
4725 | break; |
||
4726 | |||
4727 | /* 11. If node is a head element, then switch the insertion mode |
||
4728 | to "in body" ("in body"! not "in head"!) and abort these steps. |
||
4729 | (innerHTML case) */ |
||
4730 | } elseif ($node->nodeName === 'head') { |
||
4731 | $this->mode = self::IN_BODY; |
||
4732 | break; |
||
4733 | |||
4734 | /* 12. If node is a body element, then switch the insertion mode to |
||
4735 | "in body" and abort these steps. */ |
||
4736 | } elseif ($node->nodeName === 'body') { |
||
4737 | $this->mode = self::IN_BODY; |
||
4738 | break; |
||
4739 | |||
4740 | /* 13. If node is a frameset element, then switch the insertion |
||
4741 | mode to "in frameset" and abort these steps. (innerHTML case) */ |
||
4742 | } elseif ($node->nodeName === 'frameset') { |
||
4743 | $this->mode = self::IN_FRAME; |
||
4744 | break; |
||
4745 | |||
4746 | /* 14. If node is an html element, then: if the head element |
||
4747 | pointer is null, switch the insertion mode to "before head", |
||
4748 | otherwise, switch the insertion mode to "after head". In either |
||
4749 | case, abort these steps. (innerHTML case) */ |
||
4750 | } elseif ($node->nodeName === 'html') { |
||
4751 | $this->mode = ($this->head_pointer === null) |
||
4752 | ? self::BEFOR_HEAD |
||
4753 | : self::AFTER_HEAD; |
||
4754 | |||
4755 | break; |
||
4756 | |||
4757 | /* 15. If last is true, then set the insertion mode to "in body" |
||
4758 | and abort these steps. (innerHTML case) */ |
||
4759 | } elseif ($last) { |
||
4760 | $this->mode = self::IN_BODY; |
||
4761 | break; |
||
4762 | } |
||
4763 | } |
||
4764 | } |
||
4765 | |||
4766 | private function closeCell() |
||
4780 | } |
||
4781 | } |
||
4782 | } |
||
4783 | |||
4784 | public function save() |
||
4785 | { |
||
4786 | return $this->dom; |
||
4787 | } |
||
4788 | } |
||
4789 |