@@ -385,8 +385,7 @@ |
||
| 385 | 385 | * separate lists for processing. Format is element[attr1|attr2],element2... |
| 386 | 386 | * @warning Although it's largely drawn from TinyMCE's implementation, |
| 387 | 387 | * it is different, and you'll probably have to modify your lists |
| 388 | - * @param $list String list to parse |
|
| 389 | - * @param array($allowed_elements, $allowed_attributes) |
|
| 388 | + * @param string $list String list to parse |
|
| 390 | 389 | * @todo Give this its own class, probably static interface |
| 391 | 390 | */ |
| 392 | 391 | public function parseTinyMCEAllowedList($list) { |
@@ -26,398 +26,398 @@ |
||
| 26 | 26 | class HTMLPurifier_HTMLDefinition extends HTMLPurifier_Definition |
| 27 | 27 | { |
| 28 | 28 | |
| 29 | - // FULLY-PUBLIC VARIABLES --------------------------------------------- |
|
| 30 | - |
|
| 31 | - /** |
|
| 32 | - * Associative array of element names to HTMLPurifier_ElementDef |
|
| 33 | - */ |
|
| 34 | - public $info = array(); |
|
| 35 | - |
|
| 36 | - /** |
|
| 37 | - * Associative array of global attribute name to attribute definition. |
|
| 38 | - */ |
|
| 39 | - public $info_global_attr = array(); |
|
| 40 | - |
|
| 41 | - /** |
|
| 42 | - * String name of parent element HTML will be going into. |
|
| 43 | - */ |
|
| 44 | - public $info_parent = 'div'; |
|
| 45 | - |
|
| 46 | - /** |
|
| 47 | - * Definition for parent element, allows parent element to be a |
|
| 48 | - * tag that's not allowed inside the HTML fragment. |
|
| 49 | - */ |
|
| 50 | - public $info_parent_def; |
|
| 51 | - |
|
| 52 | - /** |
|
| 53 | - * String name of element used to wrap inline elements in block context |
|
| 54 | - * @note This is rarely used except for BLOCKQUOTEs in strict mode |
|
| 55 | - */ |
|
| 56 | - public $info_block_wrapper = 'p'; |
|
| 57 | - |
|
| 58 | - /** |
|
| 59 | - * Associative array of deprecated tag name to HTMLPurifier_TagTransform |
|
| 60 | - */ |
|
| 61 | - public $info_tag_transform = array(); |
|
| 62 | - |
|
| 63 | - /** |
|
| 64 | - * Indexed list of HTMLPurifier_AttrTransform to be performed before validation. |
|
| 65 | - */ |
|
| 66 | - public $info_attr_transform_pre = array(); |
|
| 67 | - |
|
| 68 | - /** |
|
| 69 | - * Indexed list of HTMLPurifier_AttrTransform to be performed after validation. |
|
| 70 | - */ |
|
| 71 | - public $info_attr_transform_post = array(); |
|
| 72 | - |
|
| 73 | - /** |
|
| 74 | - * Nested lookup array of content set name (Block, Inline) to |
|
| 75 | - * element name to whether or not it belongs in that content set. |
|
| 76 | - */ |
|
| 77 | - public $info_content_sets = array(); |
|
| 78 | - |
|
| 79 | - /** |
|
| 80 | - * Indexed list of HTMLPurifier_Injector to be used. |
|
| 81 | - */ |
|
| 82 | - public $info_injector = array(); |
|
| 83 | - |
|
| 84 | - /** |
|
| 85 | - * Doctype object |
|
| 86 | - */ |
|
| 87 | - public $doctype; |
|
| 88 | - |
|
| 89 | - |
|
| 90 | - |
|
| 91 | - // RAW CUSTOMIZATION STUFF -------------------------------------------- |
|
| 92 | - |
|
| 93 | - /** |
|
| 94 | - * Adds a custom attribute to a pre-existing element |
|
| 95 | - * @note This is strictly convenience, and does not have a corresponding |
|
| 96 | - * method in HTMLPurifier_HTMLModule |
|
| 97 | - * @param $element_name String element name to add attribute to |
|
| 98 | - * @param $attr_name String name of attribute |
|
| 99 | - * @param $def Attribute definition, can be string or object, see |
|
| 100 | - * HTMLPurifier_AttrTypes for details |
|
| 101 | - */ |
|
| 102 | - public function addAttribute($element_name, $attr_name, $def) { |
|
| 103 | - $module = $this->getAnonymousModule(); |
|
| 104 | - if (!isset($module->info[$element_name])) { |
|
| 105 | - $element = $module->addBlankElement($element_name); |
|
| 106 | - } else { |
|
| 107 | - $element = $module->info[$element_name]; |
|
| 108 | - } |
|
| 109 | - $element->attr[$attr_name] = $def; |
|
| 110 | - } |
|
| 111 | - |
|
| 112 | - /** |
|
| 113 | - * Adds a custom element to your HTML definition |
|
| 114 | - * @note See HTMLPurifier_HTMLModule::addElement for detailed |
|
| 115 | - * parameter and return value descriptions. |
|
| 116 | - */ |
|
| 117 | - public function addElement($element_name, $type, $contents, $attr_collections, $attributes = array()) { |
|
| 118 | - $module = $this->getAnonymousModule(); |
|
| 119 | - // assume that if the user is calling this, the element |
|
| 120 | - // is safe. This may not be a good idea |
|
| 121 | - $element = $module->addElement($element_name, $type, $contents, $attr_collections, $attributes); |
|
| 122 | - return $element; |
|
| 123 | - } |
|
| 124 | - |
|
| 125 | - /** |
|
| 126 | - * Adds a blank element to your HTML definition, for overriding |
|
| 127 | - * existing behavior |
|
| 128 | - * @note See HTMLPurifier_HTMLModule::addBlankElement for detailed |
|
| 129 | - * parameter and return value descriptions. |
|
| 130 | - */ |
|
| 131 | - public function addBlankElement($element_name) { |
|
| 132 | - $module = $this->getAnonymousModule(); |
|
| 133 | - $element = $module->addBlankElement($element_name); |
|
| 134 | - return $element; |
|
| 135 | - } |
|
| 136 | - |
|
| 137 | - /** |
|
| 138 | - * Retrieves a reference to the anonymous module, so you can |
|
| 139 | - * bust out advanced features without having to make your own |
|
| 140 | - * module. |
|
| 141 | - */ |
|
| 142 | - public function getAnonymousModule() { |
|
| 143 | - if (!$this->_anonModule) { |
|
| 144 | - $this->_anonModule = new HTMLPurifier_HTMLModule(); |
|
| 145 | - $this->_anonModule->name = 'Anonymous'; |
|
| 146 | - } |
|
| 147 | - return $this->_anonModule; |
|
| 148 | - } |
|
| 149 | - |
|
| 150 | - private $_anonModule = null; |
|
| 151 | - |
|
| 152 | - |
|
| 153 | - // PUBLIC BUT INTERNAL VARIABLES -------------------------------------- |
|
| 154 | - |
|
| 155 | - public $type = 'HTML'; |
|
| 156 | - public $manager; /**< Instance of HTMLPurifier_HTMLModuleManager */ |
|
| 157 | - |
|
| 158 | - /** |
|
| 159 | - * Performs low-cost, preliminary initialization. |
|
| 160 | - */ |
|
| 161 | - public function __construct() { |
|
| 162 | - $this->manager = new HTMLPurifier_HTMLModuleManager(); |
|
| 163 | - } |
|
| 164 | - |
|
| 165 | - protected function doSetup($config) { |
|
| 166 | - $this->processModules($config); |
|
| 167 | - $this->setupConfigStuff($config); |
|
| 168 | - unset($this->manager); |
|
| 169 | - |
|
| 170 | - // cleanup some of the element definitions |
|
| 171 | - foreach ($this->info as $k => $v) { |
|
| 172 | - unset($this->info[$k]->content_model); |
|
| 173 | - unset($this->info[$k]->content_model_type); |
|
| 174 | - } |
|
| 175 | - } |
|
| 176 | - |
|
| 177 | - /** |
|
| 178 | - * Extract out the information from the manager |
|
| 179 | - */ |
|
| 180 | - protected function processModules($config) { |
|
| 181 | - |
|
| 182 | - if ($this->_anonModule) { |
|
| 183 | - // for user specific changes |
|
| 184 | - // this is late-loaded so we don't have to deal with PHP4 |
|
| 185 | - // reference wonky-ness |
|
| 186 | - $this->manager->addModule($this->_anonModule); |
|
| 187 | - unset($this->_anonModule); |
|
| 188 | - } |
|
| 189 | - |
|
| 190 | - $this->manager->setup($config); |
|
| 191 | - $this->doctype = $this->manager->doctype; |
|
| 192 | - |
|
| 193 | - foreach ($this->manager->modules as $module) { |
|
| 194 | - foreach($module->info_tag_transform as $k => $v) { |
|
| 195 | - if ($v === false) unset($this->info_tag_transform[$k]); |
|
| 196 | - else $this->info_tag_transform[$k] = $v; |
|
| 197 | - } |
|
| 198 | - foreach($module->info_attr_transform_pre as $k => $v) { |
|
| 199 | - if ($v === false) unset($this->info_attr_transform_pre[$k]); |
|
| 200 | - else $this->info_attr_transform_pre[$k] = $v; |
|
| 201 | - } |
|
| 202 | - foreach($module->info_attr_transform_post as $k => $v) { |
|
| 203 | - if ($v === false) unset($this->info_attr_transform_post[$k]); |
|
| 204 | - else $this->info_attr_transform_post[$k] = $v; |
|
| 205 | - } |
|
| 206 | - foreach ($module->info_injector as $k => $v) { |
|
| 207 | - if ($v === false) unset($this->info_injector[$k]); |
|
| 208 | - else $this->info_injector[$k] = $v; |
|
| 209 | - } |
|
| 210 | - } |
|
| 211 | - |
|
| 212 | - $this->info = $this->manager->getElements(); |
|
| 213 | - $this->info_content_sets = $this->manager->contentSets->lookup; |
|
| 214 | - |
|
| 215 | - } |
|
| 216 | - |
|
| 217 | - /** |
|
| 218 | - * Sets up stuff based on config. We need a better way of doing this. |
|
| 219 | - */ |
|
| 220 | - protected function setupConfigStuff($config) { |
|
| 221 | - |
|
| 222 | - $block_wrapper = $config->get('HTML.BlockWrapper'); |
|
| 223 | - if (isset($this->info_content_sets['Block'][$block_wrapper])) { |
|
| 224 | - $this->info_block_wrapper = $block_wrapper; |
|
| 225 | - } else { |
|
| 226 | - trigger_error('Cannot use non-block element as block wrapper', |
|
| 227 | - E_USER_ERROR); |
|
| 228 | - } |
|
| 229 | - |
|
| 230 | - $parent = $config->get('HTML.Parent'); |
|
| 231 | - $def = $this->manager->getElement($parent, true); |
|
| 232 | - if ($def) { |
|
| 233 | - $this->info_parent = $parent; |
|
| 234 | - $this->info_parent_def = $def; |
|
| 235 | - } else { |
|
| 236 | - trigger_error('Cannot use unrecognized element as parent', |
|
| 237 | - E_USER_ERROR); |
|
| 238 | - $this->info_parent_def = $this->manager->getElement($this->info_parent, true); |
|
| 239 | - } |
|
| 240 | - |
|
| 241 | - // support template text |
|
| 242 | - $support = "(for information on implementing this, see the ". |
|
| 243 | - "support forums) "; |
|
| 244 | - |
|
| 245 | - // setup allowed elements ----------------------------------------- |
|
| 246 | - |
|
| 247 | - $allowed_elements = $config->get('HTML.AllowedElements'); |
|
| 248 | - $allowed_attributes = $config->get('HTML.AllowedAttributes'); // retrieve early |
|
| 249 | - |
|
| 250 | - if (!is_array($allowed_elements) && !is_array($allowed_attributes)) { |
|
| 251 | - $allowed = $config->get('HTML.Allowed'); |
|
| 252 | - if (is_string($allowed)) { |
|
| 253 | - list($allowed_elements, $allowed_attributes) = $this->parseTinyMCEAllowedList($allowed); |
|
| 254 | - } |
|
| 255 | - } |
|
| 256 | - |
|
| 257 | - if (is_array($allowed_elements)) { |
|
| 258 | - foreach ($this->info as $name => $d) { |
|
| 259 | - if(!isset($allowed_elements[$name])) unset($this->info[$name]); |
|
| 260 | - unset($allowed_elements[$name]); |
|
| 261 | - } |
|
| 262 | - // emit errors |
|
| 263 | - foreach ($allowed_elements as $element => $d) { |
|
| 264 | - $element = htmlspecialchars($element, ENT_COMPAT | ENT_HTML401, 'UTF-8', false); // PHP doesn't escape errors, be careful! |
|
| 265 | - trigger_error("Element '$element' is not supported $support", E_USER_WARNING); |
|
| 266 | - } |
|
| 267 | - } |
|
| 268 | - |
|
| 269 | - // setup allowed attributes --------------------------------------- |
|
| 270 | - |
|
| 271 | - $allowed_attributes_mutable = $allowed_attributes; // by copy! |
|
| 272 | - if (is_array($allowed_attributes)) { |
|
| 273 | - |
|
| 274 | - // This actually doesn't do anything, since we went away from |
|
| 275 | - // global attributes. It's possible that userland code uses |
|
| 276 | - // it, but HTMLModuleManager doesn't! |
|
| 277 | - foreach ($this->info_global_attr as $attr => $x) { |
|
| 278 | - $keys = array($attr, "*@$attr", "*.$attr"); |
|
| 279 | - $delete = true; |
|
| 280 | - foreach ($keys as $key) { |
|
| 281 | - if ($delete && isset($allowed_attributes[$key])) { |
|
| 282 | - $delete = false; |
|
| 283 | - } |
|
| 284 | - if (isset($allowed_attributes_mutable[$key])) { |
|
| 285 | - unset($allowed_attributes_mutable[$key]); |
|
| 286 | - } |
|
| 287 | - } |
|
| 288 | - if ($delete) unset($this->info_global_attr[$attr]); |
|
| 289 | - } |
|
| 290 | - |
|
| 291 | - foreach ($this->info as $tag => $info) { |
|
| 292 | - foreach ($info->attr as $attr => $x) { |
|
| 293 | - $keys = array("$tag@$attr", $attr, "*@$attr", "$tag.$attr", "*.$attr"); |
|
| 294 | - $delete = true; |
|
| 295 | - foreach ($keys as $key) { |
|
| 296 | - if ($delete && isset($allowed_attributes[$key])) { |
|
| 297 | - $delete = false; |
|
| 298 | - } |
|
| 299 | - if (isset($allowed_attributes_mutable[$key])) { |
|
| 300 | - unset($allowed_attributes_mutable[$key]); |
|
| 301 | - } |
|
| 302 | - } |
|
| 303 | - if ($delete) { |
|
| 304 | - if ($this->info[$tag]->attr[$attr]->required) { |
|
| 305 | - trigger_error("Required attribute '$attr' in element '$tag' was not allowed, which means '$tag' will not be allowed either", E_USER_WARNING); |
|
| 306 | - } |
|
| 307 | - unset($this->info[$tag]->attr[$attr]); |
|
| 308 | - } |
|
| 309 | - } |
|
| 310 | - } |
|
| 311 | - // emit errors |
|
| 312 | - foreach ($allowed_attributes_mutable as $elattr => $d) { |
|
| 313 | - $bits = preg_split('/[.@]/', $elattr, 2); |
|
| 314 | - $c = count($bits); |
|
| 315 | - switch ($c) { |
|
| 316 | - case 2: |
|
| 317 | - if ($bits[0] !== '*') { |
|
| 318 | - $element = htmlspecialchars($bits[0], ENT_COMPAT | ENT_HTML401, 'UTF-8', false); |
|
| 319 | - $attribute = htmlspecialchars($bits[1], ENT_COMPAT | ENT_HTML401, 'UTF-8', false); |
|
| 320 | - if (!isset($this->info[$element])) { |
|
| 321 | - trigger_error("Cannot allow attribute '$attribute' if element '$element' is not allowed/supported $support"); |
|
| 322 | - } else { |
|
| 323 | - trigger_error("Attribute '$attribute' in element '$element' not supported $support", |
|
| 324 | - E_USER_WARNING); |
|
| 325 | - } |
|
| 326 | - break; |
|
| 327 | - } |
|
| 328 | - // otherwise fall through |
|
| 329 | - case 1: |
|
| 330 | - $attribute = htmlspecialchars($bits[0], ENT_COMPAT | ENT_HTML401, 'UTF-8', false); |
|
| 331 | - trigger_error("Global attribute '$attribute' is not ". |
|
| 332 | - "supported in any elements $support", |
|
| 333 | - E_USER_WARNING); |
|
| 334 | - break; |
|
| 335 | - } |
|
| 336 | - } |
|
| 337 | - |
|
| 338 | - } |
|
| 339 | - |
|
| 340 | - // setup forbidden elements --------------------------------------- |
|
| 341 | - |
|
| 342 | - $forbidden_elements = $config->get('HTML.ForbiddenElements'); |
|
| 343 | - $forbidden_attributes = $config->get('HTML.ForbiddenAttributes'); |
|
| 344 | - |
|
| 345 | - foreach ($this->info as $tag => $info) { |
|
| 346 | - if (isset($forbidden_elements[$tag])) { |
|
| 347 | - unset($this->info[$tag]); |
|
| 348 | - continue; |
|
| 349 | - } |
|
| 350 | - foreach ($info->attr as $attr => $x) { |
|
| 351 | - if ( |
|
| 352 | - isset($forbidden_attributes["$tag@$attr"]) || |
|
| 353 | - isset($forbidden_attributes["*@$attr"]) || |
|
| 354 | - isset($forbidden_attributes[$attr]) |
|
| 355 | - ) { |
|
| 356 | - unset($this->info[$tag]->attr[$attr]); |
|
| 357 | - continue; |
|
| 358 | - } // this segment might get removed eventually |
|
| 359 | - elseif (isset($forbidden_attributes["$tag.$attr"])) { |
|
| 360 | - // $tag.$attr are not user supplied, so no worries! |
|
| 361 | - trigger_error("Error with $tag.$attr: tag.attr syntax not supported for HTML.ForbiddenAttributes; use tag@attr instead", E_USER_WARNING); |
|
| 362 | - } |
|
| 363 | - } |
|
| 364 | - } |
|
| 365 | - foreach ($forbidden_attributes as $key => $v) { |
|
| 366 | - if (strlen($key) < 2) continue; |
|
| 367 | - if ($key[0] != '*') continue; |
|
| 368 | - if ($key[1] == '.') { |
|
| 369 | - trigger_error("Error with $key: *.attr syntax not supported for HTML.ForbiddenAttributes; use attr instead", E_USER_WARNING); |
|
| 370 | - } |
|
| 371 | - } |
|
| 372 | - |
|
| 373 | - // setup injectors ----------------------------------------------------- |
|
| 374 | - foreach ($this->info_injector as $i => $injector) { |
|
| 375 | - if ($injector->checkNeeded($config) !== false) { |
|
| 376 | - // remove injector that does not have it's required |
|
| 377 | - // elements/attributes present, and is thus not needed. |
|
| 378 | - unset($this->info_injector[$i]); |
|
| 379 | - } |
|
| 380 | - } |
|
| 381 | - } |
|
| 382 | - |
|
| 383 | - /** |
|
| 384 | - * Parses a TinyMCE-flavored Allowed Elements and Attributes list into |
|
| 385 | - * separate lists for processing. Format is element[attr1|attr2],element2... |
|
| 386 | - * @warning Although it's largely drawn from TinyMCE's implementation, |
|
| 387 | - * it is different, and you'll probably have to modify your lists |
|
| 388 | - * @param $list String list to parse |
|
| 389 | - * @param array($allowed_elements, $allowed_attributes) |
|
| 390 | - * @todo Give this its own class, probably static interface |
|
| 391 | - */ |
|
| 392 | - public function parseTinyMCEAllowedList($list) { |
|
| 393 | - |
|
| 394 | - $list = str_replace(array(' ', "\t"), '', $list); |
|
| 395 | - |
|
| 396 | - $elements = array(); |
|
| 397 | - $attributes = array(); |
|
| 398 | - |
|
| 399 | - $chunks = preg_split('/(,|[\n\r]+)/', $list); |
|
| 400 | - foreach ($chunks as $chunk) { |
|
| 401 | - if (empty($chunk)) continue; |
|
| 402 | - // remove TinyMCE element control characters |
|
| 403 | - if (!strpos($chunk, '[')) { |
|
| 404 | - $element = $chunk; |
|
| 405 | - $attr = false; |
|
| 406 | - } else { |
|
| 407 | - list($element, $attr) = explode('[', $chunk); |
|
| 408 | - } |
|
| 409 | - if ($element !== '*') $elements[$element] = true; |
|
| 410 | - if (!$attr) continue; |
|
| 411 | - $attr = substr($attr, 0, strlen($attr) - 1); // remove trailing ] |
|
| 412 | - $attr = explode('|', $attr); |
|
| 413 | - foreach ($attr as $key) { |
|
| 414 | - $attributes["$element.$key"] = true; |
|
| 415 | - } |
|
| 416 | - } |
|
| 417 | - |
|
| 418 | - return array($elements, $attributes); |
|
| 419 | - |
|
| 420 | - } |
|
| 29 | + // FULLY-PUBLIC VARIABLES --------------------------------------------- |
|
| 30 | + |
|
| 31 | + /** |
|
| 32 | + * Associative array of element names to HTMLPurifier_ElementDef |
|
| 33 | + */ |
|
| 34 | + public $info = array(); |
|
| 35 | + |
|
| 36 | + /** |
|
| 37 | + * Associative array of global attribute name to attribute definition. |
|
| 38 | + */ |
|
| 39 | + public $info_global_attr = array(); |
|
| 40 | + |
|
| 41 | + /** |
|
| 42 | + * String name of parent element HTML will be going into. |
|
| 43 | + */ |
|
| 44 | + public $info_parent = 'div'; |
|
| 45 | + |
|
| 46 | + /** |
|
| 47 | + * Definition for parent element, allows parent element to be a |
|
| 48 | + * tag that's not allowed inside the HTML fragment. |
|
| 49 | + */ |
|
| 50 | + public $info_parent_def; |
|
| 51 | + |
|
| 52 | + /** |
|
| 53 | + * String name of element used to wrap inline elements in block context |
|
| 54 | + * @note This is rarely used except for BLOCKQUOTEs in strict mode |
|
| 55 | + */ |
|
| 56 | + public $info_block_wrapper = 'p'; |
|
| 57 | + |
|
| 58 | + /** |
|
| 59 | + * Associative array of deprecated tag name to HTMLPurifier_TagTransform |
|
| 60 | + */ |
|
| 61 | + public $info_tag_transform = array(); |
|
| 62 | + |
|
| 63 | + /** |
|
| 64 | + * Indexed list of HTMLPurifier_AttrTransform to be performed before validation. |
|
| 65 | + */ |
|
| 66 | + public $info_attr_transform_pre = array(); |
|
| 67 | + |
|
| 68 | + /** |
|
| 69 | + * Indexed list of HTMLPurifier_AttrTransform to be performed after validation. |
|
| 70 | + */ |
|
| 71 | + public $info_attr_transform_post = array(); |
|
| 72 | + |
|
| 73 | + /** |
|
| 74 | + * Nested lookup array of content set name (Block, Inline) to |
|
| 75 | + * element name to whether or not it belongs in that content set. |
|
| 76 | + */ |
|
| 77 | + public $info_content_sets = array(); |
|
| 78 | + |
|
| 79 | + /** |
|
| 80 | + * Indexed list of HTMLPurifier_Injector to be used. |
|
| 81 | + */ |
|
| 82 | + public $info_injector = array(); |
|
| 83 | + |
|
| 84 | + /** |
|
| 85 | + * Doctype object |
|
| 86 | + */ |
|
| 87 | + public $doctype; |
|
| 88 | + |
|
| 89 | + |
|
| 90 | + |
|
| 91 | + // RAW CUSTOMIZATION STUFF -------------------------------------------- |
|
| 92 | + |
|
| 93 | + /** |
|
| 94 | + * Adds a custom attribute to a pre-existing element |
|
| 95 | + * @note This is strictly convenience, and does not have a corresponding |
|
| 96 | + * method in HTMLPurifier_HTMLModule |
|
| 97 | + * @param $element_name String element name to add attribute to |
|
| 98 | + * @param $attr_name String name of attribute |
|
| 99 | + * @param $def Attribute definition, can be string or object, see |
|
| 100 | + * HTMLPurifier_AttrTypes for details |
|
| 101 | + */ |
|
| 102 | + public function addAttribute($element_name, $attr_name, $def) { |
|
| 103 | + $module = $this->getAnonymousModule(); |
|
| 104 | + if (!isset($module->info[$element_name])) { |
|
| 105 | + $element = $module->addBlankElement($element_name); |
|
| 106 | + } else { |
|
| 107 | + $element = $module->info[$element_name]; |
|
| 108 | + } |
|
| 109 | + $element->attr[$attr_name] = $def; |
|
| 110 | + } |
|
| 111 | + |
|
| 112 | + /** |
|
| 113 | + * Adds a custom element to your HTML definition |
|
| 114 | + * @note See HTMLPurifier_HTMLModule::addElement for detailed |
|
| 115 | + * parameter and return value descriptions. |
|
| 116 | + */ |
|
| 117 | + public function addElement($element_name, $type, $contents, $attr_collections, $attributes = array()) { |
|
| 118 | + $module = $this->getAnonymousModule(); |
|
| 119 | + // assume that if the user is calling this, the element |
|
| 120 | + // is safe. This may not be a good idea |
|
| 121 | + $element = $module->addElement($element_name, $type, $contents, $attr_collections, $attributes); |
|
| 122 | + return $element; |
|
| 123 | + } |
|
| 124 | + |
|
| 125 | + /** |
|
| 126 | + * Adds a blank element to your HTML definition, for overriding |
|
| 127 | + * existing behavior |
|
| 128 | + * @note See HTMLPurifier_HTMLModule::addBlankElement for detailed |
|
| 129 | + * parameter and return value descriptions. |
|
| 130 | + */ |
|
| 131 | + public function addBlankElement($element_name) { |
|
| 132 | + $module = $this->getAnonymousModule(); |
|
| 133 | + $element = $module->addBlankElement($element_name); |
|
| 134 | + return $element; |
|
| 135 | + } |
|
| 136 | + |
|
| 137 | + /** |
|
| 138 | + * Retrieves a reference to the anonymous module, so you can |
|
| 139 | + * bust out advanced features without having to make your own |
|
| 140 | + * module. |
|
| 141 | + */ |
|
| 142 | + public function getAnonymousModule() { |
|
| 143 | + if (!$this->_anonModule) { |
|
| 144 | + $this->_anonModule = new HTMLPurifier_HTMLModule(); |
|
| 145 | + $this->_anonModule->name = 'Anonymous'; |
|
| 146 | + } |
|
| 147 | + return $this->_anonModule; |
|
| 148 | + } |
|
| 149 | + |
|
| 150 | + private $_anonModule = null; |
|
| 151 | + |
|
| 152 | + |
|
| 153 | + // PUBLIC BUT INTERNAL VARIABLES -------------------------------------- |
|
| 154 | + |
|
| 155 | + public $type = 'HTML'; |
|
| 156 | + public $manager; /**< Instance of HTMLPurifier_HTMLModuleManager */ |
|
| 157 | + |
|
| 158 | + /** |
|
| 159 | + * Performs low-cost, preliminary initialization. |
|
| 160 | + */ |
|
| 161 | + public function __construct() { |
|
| 162 | + $this->manager = new HTMLPurifier_HTMLModuleManager(); |
|
| 163 | + } |
|
| 164 | + |
|
| 165 | + protected function doSetup($config) { |
|
| 166 | + $this->processModules($config); |
|
| 167 | + $this->setupConfigStuff($config); |
|
| 168 | + unset($this->manager); |
|
| 169 | + |
|
| 170 | + // cleanup some of the element definitions |
|
| 171 | + foreach ($this->info as $k => $v) { |
|
| 172 | + unset($this->info[$k]->content_model); |
|
| 173 | + unset($this->info[$k]->content_model_type); |
|
| 174 | + } |
|
| 175 | + } |
|
| 176 | + |
|
| 177 | + /** |
|
| 178 | + * Extract out the information from the manager |
|
| 179 | + */ |
|
| 180 | + protected function processModules($config) { |
|
| 181 | + |
|
| 182 | + if ($this->_anonModule) { |
|
| 183 | + // for user specific changes |
|
| 184 | + // this is late-loaded so we don't have to deal with PHP4 |
|
| 185 | + // reference wonky-ness |
|
| 186 | + $this->manager->addModule($this->_anonModule); |
|
| 187 | + unset($this->_anonModule); |
|
| 188 | + } |
|
| 189 | + |
|
| 190 | + $this->manager->setup($config); |
|
| 191 | + $this->doctype = $this->manager->doctype; |
|
| 192 | + |
|
| 193 | + foreach ($this->manager->modules as $module) { |
|
| 194 | + foreach($module->info_tag_transform as $k => $v) { |
|
| 195 | + if ($v === false) unset($this->info_tag_transform[$k]); |
|
| 196 | + else $this->info_tag_transform[$k] = $v; |
|
| 197 | + } |
|
| 198 | + foreach($module->info_attr_transform_pre as $k => $v) { |
|
| 199 | + if ($v === false) unset($this->info_attr_transform_pre[$k]); |
|
| 200 | + else $this->info_attr_transform_pre[$k] = $v; |
|
| 201 | + } |
|
| 202 | + foreach($module->info_attr_transform_post as $k => $v) { |
|
| 203 | + if ($v === false) unset($this->info_attr_transform_post[$k]); |
|
| 204 | + else $this->info_attr_transform_post[$k] = $v; |
|
| 205 | + } |
|
| 206 | + foreach ($module->info_injector as $k => $v) { |
|
| 207 | + if ($v === false) unset($this->info_injector[$k]); |
|
| 208 | + else $this->info_injector[$k] = $v; |
|
| 209 | + } |
|
| 210 | + } |
|
| 211 | + |
|
| 212 | + $this->info = $this->manager->getElements(); |
|
| 213 | + $this->info_content_sets = $this->manager->contentSets->lookup; |
|
| 214 | + |
|
| 215 | + } |
|
| 216 | + |
|
| 217 | + /** |
|
| 218 | + * Sets up stuff based on config. We need a better way of doing this. |
|
| 219 | + */ |
|
| 220 | + protected function setupConfigStuff($config) { |
|
| 221 | + |
|
| 222 | + $block_wrapper = $config->get('HTML.BlockWrapper'); |
|
| 223 | + if (isset($this->info_content_sets['Block'][$block_wrapper])) { |
|
| 224 | + $this->info_block_wrapper = $block_wrapper; |
|
| 225 | + } else { |
|
| 226 | + trigger_error('Cannot use non-block element as block wrapper', |
|
| 227 | + E_USER_ERROR); |
|
| 228 | + } |
|
| 229 | + |
|
| 230 | + $parent = $config->get('HTML.Parent'); |
|
| 231 | + $def = $this->manager->getElement($parent, true); |
|
| 232 | + if ($def) { |
|
| 233 | + $this->info_parent = $parent; |
|
| 234 | + $this->info_parent_def = $def; |
|
| 235 | + } else { |
|
| 236 | + trigger_error('Cannot use unrecognized element as parent', |
|
| 237 | + E_USER_ERROR); |
|
| 238 | + $this->info_parent_def = $this->manager->getElement($this->info_parent, true); |
|
| 239 | + } |
|
| 240 | + |
|
| 241 | + // support template text |
|
| 242 | + $support = "(for information on implementing this, see the ". |
|
| 243 | + "support forums) "; |
|
| 244 | + |
|
| 245 | + // setup allowed elements ----------------------------------------- |
|
| 246 | + |
|
| 247 | + $allowed_elements = $config->get('HTML.AllowedElements'); |
|
| 248 | + $allowed_attributes = $config->get('HTML.AllowedAttributes'); // retrieve early |
|
| 249 | + |
|
| 250 | + if (!is_array($allowed_elements) && !is_array($allowed_attributes)) { |
|
| 251 | + $allowed = $config->get('HTML.Allowed'); |
|
| 252 | + if (is_string($allowed)) { |
|
| 253 | + list($allowed_elements, $allowed_attributes) = $this->parseTinyMCEAllowedList($allowed); |
|
| 254 | + } |
|
| 255 | + } |
|
| 256 | + |
|
| 257 | + if (is_array($allowed_elements)) { |
|
| 258 | + foreach ($this->info as $name => $d) { |
|
| 259 | + if(!isset($allowed_elements[$name])) unset($this->info[$name]); |
|
| 260 | + unset($allowed_elements[$name]); |
|
| 261 | + } |
|
| 262 | + // emit errors |
|
| 263 | + foreach ($allowed_elements as $element => $d) { |
|
| 264 | + $element = htmlspecialchars($element, ENT_COMPAT | ENT_HTML401, 'UTF-8', false); // PHP doesn't escape errors, be careful! |
|
| 265 | + trigger_error("Element '$element' is not supported $support", E_USER_WARNING); |
|
| 266 | + } |
|
| 267 | + } |
|
| 268 | + |
|
| 269 | + // setup allowed attributes --------------------------------------- |
|
| 270 | + |
|
| 271 | + $allowed_attributes_mutable = $allowed_attributes; // by copy! |
|
| 272 | + if (is_array($allowed_attributes)) { |
|
| 273 | + |
|
| 274 | + // This actually doesn't do anything, since we went away from |
|
| 275 | + // global attributes. It's possible that userland code uses |
|
| 276 | + // it, but HTMLModuleManager doesn't! |
|
| 277 | + foreach ($this->info_global_attr as $attr => $x) { |
|
| 278 | + $keys = array($attr, "*@$attr", "*.$attr"); |
|
| 279 | + $delete = true; |
|
| 280 | + foreach ($keys as $key) { |
|
| 281 | + if ($delete && isset($allowed_attributes[$key])) { |
|
| 282 | + $delete = false; |
|
| 283 | + } |
|
| 284 | + if (isset($allowed_attributes_mutable[$key])) { |
|
| 285 | + unset($allowed_attributes_mutable[$key]); |
|
| 286 | + } |
|
| 287 | + } |
|
| 288 | + if ($delete) unset($this->info_global_attr[$attr]); |
|
| 289 | + } |
|
| 290 | + |
|
| 291 | + foreach ($this->info as $tag => $info) { |
|
| 292 | + foreach ($info->attr as $attr => $x) { |
|
| 293 | + $keys = array("$tag@$attr", $attr, "*@$attr", "$tag.$attr", "*.$attr"); |
|
| 294 | + $delete = true; |
|
| 295 | + foreach ($keys as $key) { |
|
| 296 | + if ($delete && isset($allowed_attributes[$key])) { |
|
| 297 | + $delete = false; |
|
| 298 | + } |
|
| 299 | + if (isset($allowed_attributes_mutable[$key])) { |
|
| 300 | + unset($allowed_attributes_mutable[$key]); |
|
| 301 | + } |
|
| 302 | + } |
|
| 303 | + if ($delete) { |
|
| 304 | + if ($this->info[$tag]->attr[$attr]->required) { |
|
| 305 | + trigger_error("Required attribute '$attr' in element '$tag' was not allowed, which means '$tag' will not be allowed either", E_USER_WARNING); |
|
| 306 | + } |
|
| 307 | + unset($this->info[$tag]->attr[$attr]); |
|
| 308 | + } |
|
| 309 | + } |
|
| 310 | + } |
|
| 311 | + // emit errors |
|
| 312 | + foreach ($allowed_attributes_mutable as $elattr => $d) { |
|
| 313 | + $bits = preg_split('/[.@]/', $elattr, 2); |
|
| 314 | + $c = count($bits); |
|
| 315 | + switch ($c) { |
|
| 316 | + case 2: |
|
| 317 | + if ($bits[0] !== '*') { |
|
| 318 | + $element = htmlspecialchars($bits[0], ENT_COMPAT | ENT_HTML401, 'UTF-8', false); |
|
| 319 | + $attribute = htmlspecialchars($bits[1], ENT_COMPAT | ENT_HTML401, 'UTF-8', false); |
|
| 320 | + if (!isset($this->info[$element])) { |
|
| 321 | + trigger_error("Cannot allow attribute '$attribute' if element '$element' is not allowed/supported $support"); |
|
| 322 | + } else { |
|
| 323 | + trigger_error("Attribute '$attribute' in element '$element' not supported $support", |
|
| 324 | + E_USER_WARNING); |
|
| 325 | + } |
|
| 326 | + break; |
|
| 327 | + } |
|
| 328 | + // otherwise fall through |
|
| 329 | + case 1: |
|
| 330 | + $attribute = htmlspecialchars($bits[0], ENT_COMPAT | ENT_HTML401, 'UTF-8', false); |
|
| 331 | + trigger_error("Global attribute '$attribute' is not ". |
|
| 332 | + "supported in any elements $support", |
|
| 333 | + E_USER_WARNING); |
|
| 334 | + break; |
|
| 335 | + } |
|
| 336 | + } |
|
| 337 | + |
|
| 338 | + } |
|
| 339 | + |
|
| 340 | + // setup forbidden elements --------------------------------------- |
|
| 341 | + |
|
| 342 | + $forbidden_elements = $config->get('HTML.ForbiddenElements'); |
|
| 343 | + $forbidden_attributes = $config->get('HTML.ForbiddenAttributes'); |
|
| 344 | + |
|
| 345 | + foreach ($this->info as $tag => $info) { |
|
| 346 | + if (isset($forbidden_elements[$tag])) { |
|
| 347 | + unset($this->info[$tag]); |
|
| 348 | + continue; |
|
| 349 | + } |
|
| 350 | + foreach ($info->attr as $attr => $x) { |
|
| 351 | + if ( |
|
| 352 | + isset($forbidden_attributes["$tag@$attr"]) || |
|
| 353 | + isset($forbidden_attributes["*@$attr"]) || |
|
| 354 | + isset($forbidden_attributes[$attr]) |
|
| 355 | + ) { |
|
| 356 | + unset($this->info[$tag]->attr[$attr]); |
|
| 357 | + continue; |
|
| 358 | + } // this segment might get removed eventually |
|
| 359 | + elseif (isset($forbidden_attributes["$tag.$attr"])) { |
|
| 360 | + // $tag.$attr are not user supplied, so no worries! |
|
| 361 | + trigger_error("Error with $tag.$attr: tag.attr syntax not supported for HTML.ForbiddenAttributes; use tag@attr instead", E_USER_WARNING); |
|
| 362 | + } |
|
| 363 | + } |
|
| 364 | + } |
|
| 365 | + foreach ($forbidden_attributes as $key => $v) { |
|
| 366 | + if (strlen($key) < 2) continue; |
|
| 367 | + if ($key[0] != '*') continue; |
|
| 368 | + if ($key[1] == '.') { |
|
| 369 | + trigger_error("Error with $key: *.attr syntax not supported for HTML.ForbiddenAttributes; use attr instead", E_USER_WARNING); |
|
| 370 | + } |
|
| 371 | + } |
|
| 372 | + |
|
| 373 | + // setup injectors ----------------------------------------------------- |
|
| 374 | + foreach ($this->info_injector as $i => $injector) { |
|
| 375 | + if ($injector->checkNeeded($config) !== false) { |
|
| 376 | + // remove injector that does not have it's required |
|
| 377 | + // elements/attributes present, and is thus not needed. |
|
| 378 | + unset($this->info_injector[$i]); |
|
| 379 | + } |
|
| 380 | + } |
|
| 381 | + } |
|
| 382 | + |
|
| 383 | + /** |
|
| 384 | + * Parses a TinyMCE-flavored Allowed Elements and Attributes list into |
|
| 385 | + * separate lists for processing. Format is element[attr1|attr2],element2... |
|
| 386 | + * @warning Although it's largely drawn from TinyMCE's implementation, |
|
| 387 | + * it is different, and you'll probably have to modify your lists |
|
| 388 | + * @param $list String list to parse |
|
| 389 | + * @param array($allowed_elements, $allowed_attributes) |
|
| 390 | + * @todo Give this its own class, probably static interface |
|
| 391 | + */ |
|
| 392 | + public function parseTinyMCEAllowedList($list) { |
|
| 393 | + |
|
| 394 | + $list = str_replace(array(' ', "\t"), '', $list); |
|
| 395 | + |
|
| 396 | + $elements = array(); |
|
| 397 | + $attributes = array(); |
|
| 398 | + |
|
| 399 | + $chunks = preg_split('/(,|[\n\r]+)/', $list); |
|
| 400 | + foreach ($chunks as $chunk) { |
|
| 401 | + if (empty($chunk)) continue; |
|
| 402 | + // remove TinyMCE element control characters |
|
| 403 | + if (!strpos($chunk, '[')) { |
|
| 404 | + $element = $chunk; |
|
| 405 | + $attr = false; |
|
| 406 | + } else { |
|
| 407 | + list($element, $attr) = explode('[', $chunk); |
|
| 408 | + } |
|
| 409 | + if ($element !== '*') $elements[$element] = true; |
|
| 410 | + if (!$attr) continue; |
|
| 411 | + $attr = substr($attr, 0, strlen($attr) - 1); // remove trailing ] |
|
| 412 | + $attr = explode('|', $attr); |
|
| 413 | + foreach ($attr as $key) { |
|
| 414 | + $attributes["$element.$key"] = true; |
|
| 415 | + } |
|
| 416 | + } |
|
| 417 | + |
|
| 418 | + return array($elements, $attributes); |
|
| 419 | + |
|
| 420 | + } |
|
| 421 | 421 | |
| 422 | 422 | |
| 423 | 423 | } |
@@ -192,20 +192,32 @@ discard block |
||
| 192 | 192 | |
| 193 | 193 | foreach ($this->manager->modules as $module) { |
| 194 | 194 | foreach($module->info_tag_transform as $k => $v) { |
| 195 | - if ($v === false) unset($this->info_tag_transform[$k]); |
|
| 196 | - else $this->info_tag_transform[$k] = $v; |
|
| 195 | + if ($v === false) { |
|
| 196 | + unset($this->info_tag_transform[$k]); |
|
| 197 | + } else { |
|
| 198 | + $this->info_tag_transform[$k] = $v; |
|
| 199 | + } |
|
| 197 | 200 | } |
| 198 | 201 | foreach($module->info_attr_transform_pre as $k => $v) { |
| 199 | - if ($v === false) unset($this->info_attr_transform_pre[$k]); |
|
| 200 | - else $this->info_attr_transform_pre[$k] = $v; |
|
| 202 | + if ($v === false) { |
|
| 203 | + unset($this->info_attr_transform_pre[$k]); |
|
| 204 | + } else { |
|
| 205 | + $this->info_attr_transform_pre[$k] = $v; |
|
| 206 | + } |
|
| 201 | 207 | } |
| 202 | 208 | foreach($module->info_attr_transform_post as $k => $v) { |
| 203 | - if ($v === false) unset($this->info_attr_transform_post[$k]); |
|
| 204 | - else $this->info_attr_transform_post[$k] = $v; |
|
| 209 | + if ($v === false) { |
|
| 210 | + unset($this->info_attr_transform_post[$k]); |
|
| 211 | + } else { |
|
| 212 | + $this->info_attr_transform_post[$k] = $v; |
|
| 213 | + } |
|
| 205 | 214 | } |
| 206 | 215 | foreach ($module->info_injector as $k => $v) { |
| 207 | - if ($v === false) unset($this->info_injector[$k]); |
|
| 208 | - else $this->info_injector[$k] = $v; |
|
| 216 | + if ($v === false) { |
|
| 217 | + unset($this->info_injector[$k]); |
|
| 218 | + } else { |
|
| 219 | + $this->info_injector[$k] = $v; |
|
| 220 | + } |
|
| 209 | 221 | } |
| 210 | 222 | } |
| 211 | 223 | |
@@ -256,7 +268,9 @@ discard block |
||
| 256 | 268 | |
| 257 | 269 | if (is_array($allowed_elements)) { |
| 258 | 270 | foreach ($this->info as $name => $d) { |
| 259 | - if(!isset($allowed_elements[$name])) unset($this->info[$name]); |
|
| 271 | + if(!isset($allowed_elements[$name])) { |
|
| 272 | + unset($this->info[$name]); |
|
| 273 | + } |
|
| 260 | 274 | unset($allowed_elements[$name]); |
| 261 | 275 | } |
| 262 | 276 | // emit errors |
@@ -285,7 +299,9 @@ discard block |
||
| 285 | 299 | unset($allowed_attributes_mutable[$key]); |
| 286 | 300 | } |
| 287 | 301 | } |
| 288 | - if ($delete) unset($this->info_global_attr[$attr]); |
|
| 302 | + if ($delete) { |
|
| 303 | + unset($this->info_global_attr[$attr]); |
|
| 304 | + } |
|
| 289 | 305 | } |
| 290 | 306 | |
| 291 | 307 | foreach ($this->info as $tag => $info) { |
@@ -363,8 +379,12 @@ discard block |
||
| 363 | 379 | } |
| 364 | 380 | } |
| 365 | 381 | foreach ($forbidden_attributes as $key => $v) { |
| 366 | - if (strlen($key) < 2) continue; |
|
| 367 | - if ($key[0] != '*') continue; |
|
| 382 | + if (strlen($key) < 2) { |
|
| 383 | + continue; |
|
| 384 | + } |
|
| 385 | + if ($key[0] != '*') { |
|
| 386 | + continue; |
|
| 387 | + } |
|
| 368 | 388 | if ($key[1] == '.') { |
| 369 | 389 | trigger_error("Error with $key: *.attr syntax not supported for HTML.ForbiddenAttributes; use attr instead", E_USER_WARNING); |
| 370 | 390 | } |
@@ -398,7 +418,9 @@ discard block |
||
| 398 | 418 | |
| 399 | 419 | $chunks = preg_split('/(,|[\n\r]+)/', $list); |
| 400 | 420 | foreach ($chunks as $chunk) { |
| 401 | - if (empty($chunk)) continue; |
|
| 421 | + if (empty($chunk)) { |
|
| 422 | + continue; |
|
| 423 | + } |
|
| 402 | 424 | // remove TinyMCE element control characters |
| 403 | 425 | if (!strpos($chunk, '[')) { |
| 404 | 426 | $element = $chunk; |
@@ -406,8 +428,12 @@ discard block |
||
| 406 | 428 | } else { |
| 407 | 429 | list($element, $attr) = explode('[', $chunk); |
| 408 | 430 | } |
| 409 | - if ($element !== '*') $elements[$element] = true; |
|
| 410 | - if (!$attr) continue; |
|
| 431 | + if ($element !== '*') { |
|
| 432 | + $elements[$element] = true; |
|
| 433 | + } |
|
| 434 | + if (!$attr) { |
|
| 435 | + continue; |
|
| 436 | + } |
|
| 411 | 437 | $attr = substr($attr, 0, strlen($attr) - 1); // remove trailing ] |
| 412 | 438 | $attr = explode('|', $attr); |
| 413 | 439 | foreach ($attr as $key) { |
@@ -191,15 +191,15 @@ discard block |
||
| 191 | 191 | $this->doctype = $this->manager->doctype; |
| 192 | 192 | |
| 193 | 193 | foreach ($this->manager->modules as $module) { |
| 194 | - foreach($module->info_tag_transform as $k => $v) { |
|
| 194 | + foreach ($module->info_tag_transform as $k => $v) { |
|
| 195 | 195 | if ($v === false) unset($this->info_tag_transform[$k]); |
| 196 | 196 | else $this->info_tag_transform[$k] = $v; |
| 197 | 197 | } |
| 198 | - foreach($module->info_attr_transform_pre as $k => $v) { |
|
| 198 | + foreach ($module->info_attr_transform_pre as $k => $v) { |
|
| 199 | 199 | if ($v === false) unset($this->info_attr_transform_pre[$k]); |
| 200 | 200 | else $this->info_attr_transform_pre[$k] = $v; |
| 201 | 201 | } |
| 202 | - foreach($module->info_attr_transform_post as $k => $v) { |
|
| 202 | + foreach ($module->info_attr_transform_post as $k => $v) { |
|
| 203 | 203 | if ($v === false) unset($this->info_attr_transform_post[$k]); |
| 204 | 204 | else $this->info_attr_transform_post[$k] = $v; |
| 205 | 205 | } |
@@ -256,7 +256,7 @@ discard block |
||
| 256 | 256 | |
| 257 | 257 | if (is_array($allowed_elements)) { |
| 258 | 258 | foreach ($this->info as $name => $d) { |
| 259 | - if(!isset($allowed_elements[$name])) unset($this->info[$name]); |
|
| 259 | + if (!isset($allowed_elements[$name])) unset($this->info[$name]); |
|
| 260 | 260 | unset($allowed_elements[$name]); |
| 261 | 261 | } |
| 262 | 262 | // emit errors |
@@ -179,6 +179,7 @@ discard block |
||
| 179 | 179 | /** |
| 180 | 180 | * Adds a module to the current doctype by first registering it, |
| 181 | 181 | * and then tacking it on to the active doctype |
| 182 | + * @param HTMLPurifier_HTMLModule $module |
|
| 182 | 183 | */ |
| 183 | 184 | public function addModule($module) { |
| 184 | 185 | $this->registerModule($module); |
@@ -325,7 +326,7 @@ discard block |
||
| 325 | 326 | /** |
| 326 | 327 | * Retrieves a single merged element definition |
| 327 | 328 | * @param $name Name of element |
| 328 | - * @param $trusted Boolean trusted overriding parameter: set to true |
|
| 329 | + * @param boolean $trusted Boolean trusted overriding parameter: set to true |
|
| 329 | 330 | * if you want the full version of an element |
| 330 | 331 | * @return Merged HTMLPurifier_ElementDef |
| 331 | 332 | * @note You may notice that modules are getting iterated over twice (once |
@@ -3,412 +3,412 @@ |
||
| 3 | 3 | class HTMLPurifier_HTMLModuleManager |
| 4 | 4 | { |
| 5 | 5 | |
| 6 | - /** |
|
| 7 | - * Instance of HTMLPurifier_DoctypeRegistry |
|
| 8 | - */ |
|
| 9 | - public $doctypes; |
|
| 10 | - |
|
| 11 | - /** |
|
| 12 | - * Instance of current doctype |
|
| 13 | - */ |
|
| 14 | - public $doctype; |
|
| 15 | - |
|
| 16 | - /** |
|
| 17 | - * Instance of HTMLPurifier_AttrTypes |
|
| 18 | - */ |
|
| 19 | - public $attrTypes; |
|
| 20 | - |
|
| 21 | - /** |
|
| 22 | - * Active instances of modules for the specified doctype are |
|
| 23 | - * indexed, by name, in this array. |
|
| 24 | - */ |
|
| 25 | - public $modules = array(); |
|
| 26 | - |
|
| 27 | - /** |
|
| 28 | - * Array of recognized HTMLPurifier_Module instances, indexed by |
|
| 29 | - * module's class name. This array is usually lazy loaded, but a |
|
| 30 | - * user can overload a module by pre-emptively registering it. |
|
| 31 | - */ |
|
| 32 | - public $registeredModules = array(); |
|
| 33 | - |
|
| 34 | - /** |
|
| 35 | - * List of extra modules that were added by the user using addModule(). |
|
| 36 | - * These get unconditionally merged into the current doctype, whatever |
|
| 37 | - * it may be. |
|
| 38 | - */ |
|
| 39 | - public $userModules = array(); |
|
| 40 | - |
|
| 41 | - /** |
|
| 42 | - * Associative array of element name to list of modules that have |
|
| 43 | - * definitions for the element; this array is dynamically filled. |
|
| 44 | - */ |
|
| 45 | - public $elementLookup = array(); |
|
| 46 | - |
|
| 47 | - /** List of prefixes we should use for registering small names */ |
|
| 48 | - public $prefixes = array('HTMLPurifier_HTMLModule_'); |
|
| 49 | - |
|
| 50 | - public $contentSets; /**< Instance of HTMLPurifier_ContentSets */ |
|
| 51 | - public $attrCollections; /**< Instance of HTMLPurifier_AttrCollections */ |
|
| 52 | - |
|
| 53 | - /** If set to true, unsafe elements and attributes will be allowed */ |
|
| 54 | - public $trusted = false; |
|
| 55 | - |
|
| 56 | - public function __construct() { |
|
| 57 | - |
|
| 58 | - // editable internal objects |
|
| 59 | - $this->attrTypes = new HTMLPurifier_AttrTypes(); |
|
| 60 | - $this->doctypes = new HTMLPurifier_DoctypeRegistry(); |
|
| 61 | - |
|
| 62 | - // setup basic modules |
|
| 63 | - $common = array( |
|
| 64 | - 'CommonAttributes', 'Text', 'Hypertext', 'List', |
|
| 65 | - 'Presentation', 'Edit', 'Bdo', 'Tables', 'Image', |
|
| 66 | - 'StyleAttribute', |
|
| 67 | - // Unsafe: |
|
| 68 | - 'Scripting', 'Object', 'Forms', |
|
| 69 | - // Sorta legacy, but present in strict: |
|
| 70 | - 'Name', |
|
| 71 | - ); |
|
| 72 | - $transitional = array('Legacy', 'Target', 'Iframe'); |
|
| 73 | - $xml = array('XMLCommonAttributes'); |
|
| 74 | - $non_xml = array('NonXMLCommonAttributes'); |
|
| 75 | - |
|
| 76 | - // setup basic doctypes |
|
| 77 | - $this->doctypes->register( |
|
| 78 | - 'HTML 4.01 Transitional', false, |
|
| 79 | - array_merge($common, $transitional, $non_xml), |
|
| 80 | - array('Tidy_Transitional', 'Tidy_Proprietary'), |
|
| 81 | - array(), |
|
| 82 | - '-//W3C//DTD HTML 4.01 Transitional//EN', |
|
| 83 | - 'http://www.w3.org/TR/html4/loose.dtd' |
|
| 84 | - ); |
|
| 85 | - |
|
| 86 | - $this->doctypes->register( |
|
| 87 | - 'HTML 4.01 Strict', false, |
|
| 88 | - array_merge($common, $non_xml), |
|
| 89 | - array('Tidy_Strict', 'Tidy_Proprietary', 'Tidy_Name'), |
|
| 90 | - array(), |
|
| 91 | - '-//W3C//DTD HTML 4.01//EN', |
|
| 92 | - 'http://www.w3.org/TR/html4/strict.dtd' |
|
| 93 | - ); |
|
| 94 | - |
|
| 95 | - $this->doctypes->register( |
|
| 96 | - 'XHTML 1.0 Transitional', true, |
|
| 97 | - array_merge($common, $transitional, $xml, $non_xml), |
|
| 98 | - array('Tidy_Transitional', 'Tidy_XHTML', 'Tidy_Proprietary', 'Tidy_Name'), |
|
| 99 | - array(), |
|
| 100 | - '-//W3C//DTD XHTML 1.0 Transitional//EN', |
|
| 101 | - 'http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd' |
|
| 102 | - ); |
|
| 103 | - |
|
| 104 | - $this->doctypes->register( |
|
| 105 | - 'XHTML 1.0 Strict', true, |
|
| 106 | - array_merge($common, $xml, $non_xml), |
|
| 107 | - array('Tidy_Strict', 'Tidy_XHTML', 'Tidy_Strict', 'Tidy_Proprietary', 'Tidy_Name'), |
|
| 108 | - array(), |
|
| 109 | - '-//W3C//DTD XHTML 1.0 Strict//EN', |
|
| 110 | - 'http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd' |
|
| 111 | - ); |
|
| 112 | - |
|
| 113 | - $this->doctypes->register( |
|
| 114 | - 'XHTML 1.1', true, |
|
| 115 | - // Iframe is a real XHTML 1.1 module, despite being |
|
| 116 | - // "transitional"! |
|
| 117 | - array_merge($common, $xml, array('Ruby', 'Iframe')), |
|
| 118 | - array('Tidy_Strict', 'Tidy_XHTML', 'Tidy_Proprietary', 'Tidy_Strict', 'Tidy_Name'), // Tidy_XHTML1_1 |
|
| 119 | - array(), |
|
| 120 | - '-//W3C//DTD XHTML 1.1//EN', |
|
| 121 | - 'http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd' |
|
| 122 | - ); |
|
| 123 | - |
|
| 124 | - } |
|
| 125 | - |
|
| 126 | - /** |
|
| 127 | - * Registers a module to the recognized module list, useful for |
|
| 128 | - * overloading pre-existing modules. |
|
| 129 | - * @param $module Mixed: string module name, with or without |
|
| 130 | - * HTMLPurifier_HTMLModule prefix, or instance of |
|
| 131 | - * subclass of HTMLPurifier_HTMLModule. |
|
| 132 | - * @param $overload Boolean whether or not to overload previous modules. |
|
| 133 | - * If this is not set, and you do overload a module, |
|
| 134 | - * HTML Purifier will complain with a warning. |
|
| 135 | - * @note This function will not call autoload, you must instantiate |
|
| 136 | - * (and thus invoke) autoload outside the method. |
|
| 137 | - * @note If a string is passed as a module name, different variants |
|
| 138 | - * will be tested in this order: |
|
| 139 | - * - Check for HTMLPurifier_HTMLModule_$name |
|
| 140 | - * - Check all prefixes with $name in order they were added |
|
| 141 | - * - Check for literal object name |
|
| 142 | - * - Throw fatal error |
|
| 143 | - * If your object name collides with an internal class, specify |
|
| 144 | - * your module manually. All modules must have been included |
|
| 145 | - * externally: registerModule will not perform inclusions for you! |
|
| 146 | - */ |
|
| 147 | - public function registerModule($module, $overload = false) { |
|
| 148 | - if (is_string($module)) { |
|
| 149 | - // attempt to load the module |
|
| 150 | - $original_module = $module; |
|
| 151 | - $ok = false; |
|
| 152 | - foreach ($this->prefixes as $prefix) { |
|
| 153 | - $module = $prefix . $original_module; |
|
| 154 | - if (class_exists($module)) { |
|
| 155 | - $ok = true; |
|
| 156 | - break; |
|
| 157 | - } |
|
| 158 | - } |
|
| 159 | - if (!$ok) { |
|
| 160 | - $module = $original_module; |
|
| 161 | - if (!class_exists($module)) { |
|
| 162 | - trigger_error($original_module . ' module does not exist', |
|
| 163 | - E_USER_ERROR); |
|
| 164 | - return; |
|
| 165 | - } |
|
| 166 | - } |
|
| 167 | - $module = new $module(); |
|
| 168 | - } |
|
| 169 | - if (empty($module->name)) { |
|
| 170 | - trigger_error('Module instance of ' . get_class($module) . ' must have name'); |
|
| 171 | - return; |
|
| 172 | - } |
|
| 173 | - if (!$overload && isset($this->registeredModules[$module->name])) { |
|
| 174 | - trigger_error('Overloading ' . $module->name . ' without explicit overload parameter', E_USER_WARNING); |
|
| 175 | - } |
|
| 176 | - $this->registeredModules[$module->name] = $module; |
|
| 177 | - } |
|
| 178 | - |
|
| 179 | - /** |
|
| 180 | - * Adds a module to the current doctype by first registering it, |
|
| 181 | - * and then tacking it on to the active doctype |
|
| 182 | - */ |
|
| 183 | - public function addModule($module) { |
|
| 184 | - $this->registerModule($module); |
|
| 185 | - if (is_object($module)) $module = $module->name; |
|
| 186 | - $this->userModules[] = $module; |
|
| 187 | - } |
|
| 188 | - |
|
| 189 | - /** |
|
| 190 | - * Adds a class prefix that registerModule() will use to resolve a |
|
| 191 | - * string name to a concrete class |
|
| 192 | - */ |
|
| 193 | - public function addPrefix($prefix) { |
|
| 194 | - $this->prefixes[] = $prefix; |
|
| 195 | - } |
|
| 196 | - |
|
| 197 | - /** |
|
| 198 | - * Performs processing on modules, after being called you may |
|
| 199 | - * use getElement() and getElements() |
|
| 200 | - * @param $config Instance of HTMLPurifier_Config |
|
| 201 | - */ |
|
| 202 | - public function setup($config) { |
|
| 203 | - |
|
| 204 | - $this->trusted = $config->get('HTML.Trusted'); |
|
| 205 | - |
|
| 206 | - // generate |
|
| 207 | - $this->doctype = $this->doctypes->make($config); |
|
| 208 | - $modules = $this->doctype->modules; |
|
| 209 | - |
|
| 210 | - // take out the default modules that aren't allowed |
|
| 211 | - $lookup = $config->get('HTML.AllowedModules'); |
|
| 212 | - $special_cases = $config->get('HTML.CoreModules'); |
|
| 213 | - |
|
| 214 | - if (is_array($lookup)) { |
|
| 215 | - foreach ($modules as $k => $m) { |
|
| 216 | - if (isset($special_cases[$m])) continue; |
|
| 217 | - if (!isset($lookup[$m])) unset($modules[$k]); |
|
| 218 | - } |
|
| 219 | - } |
|
| 220 | - |
|
| 221 | - // custom modules |
|
| 222 | - if ($config->get('HTML.Proprietary')) { |
|
| 223 | - $modules[] = 'Proprietary'; |
|
| 224 | - } |
|
| 225 | - if ($config->get('HTML.SafeObject')) { |
|
| 226 | - $modules[] = 'SafeObject'; |
|
| 227 | - } |
|
| 228 | - if ($config->get('HTML.SafeEmbed')) { |
|
| 229 | - $modules[] = 'SafeEmbed'; |
|
| 230 | - } |
|
| 231 | - if ($config->get('HTML.Nofollow')) { |
|
| 232 | - $modules[] = 'Nofollow'; |
|
| 233 | - } |
|
| 234 | - if ($config->get('HTML.TargetBlank')) { |
|
| 235 | - $modules[] = 'TargetBlank'; |
|
| 236 | - } |
|
| 237 | - |
|
| 238 | - // merge in custom modules |
|
| 239 | - $modules = array_merge($modules, $this->userModules); |
|
| 240 | - |
|
| 241 | - foreach ($modules as $module) { |
|
| 242 | - $this->processModule($module); |
|
| 243 | - $this->modules[$module]->setup($config); |
|
| 244 | - } |
|
| 245 | - |
|
| 246 | - foreach ($this->doctype->tidyModules as $module) { |
|
| 247 | - $this->processModule($module); |
|
| 248 | - $this->modules[$module]->setup($config); |
|
| 249 | - } |
|
| 250 | - |
|
| 251 | - // prepare any injectors |
|
| 252 | - foreach ($this->modules as $module) { |
|
| 253 | - $n = array(); |
|
| 254 | - foreach ($module->info_injector as $i => $injector) { |
|
| 255 | - if (!is_object($injector)) { |
|
| 256 | - $class = "HTMLPurifier_Injector_$injector"; |
|
| 257 | - $injector = new $class; |
|
| 258 | - } |
|
| 259 | - $n[$injector->name] = $injector; |
|
| 260 | - } |
|
| 261 | - $module->info_injector = $n; |
|
| 262 | - } |
|
| 263 | - |
|
| 264 | - // setup lookup table based on all valid modules |
|
| 265 | - foreach ($this->modules as $module) { |
|
| 266 | - foreach ($module->info as $name => $def) { |
|
| 267 | - if (!isset($this->elementLookup[$name])) { |
|
| 268 | - $this->elementLookup[$name] = array(); |
|
| 269 | - } |
|
| 270 | - $this->elementLookup[$name][] = $module->name; |
|
| 271 | - } |
|
| 272 | - } |
|
| 273 | - |
|
| 274 | - // note the different choice |
|
| 275 | - $this->contentSets = new HTMLPurifier_ContentSets( |
|
| 276 | - // content set assembly deals with all possible modules, |
|
| 277 | - // not just ones deemed to be "safe" |
|
| 278 | - $this->modules |
|
| 279 | - ); |
|
| 280 | - $this->attrCollections = new HTMLPurifier_AttrCollections( |
|
| 281 | - $this->attrTypes, |
|
| 282 | - // there is no way to directly disable a global attribute, |
|
| 283 | - // but using AllowedAttributes or simply not including |
|
| 284 | - // the module in your custom doctype should be sufficient |
|
| 285 | - $this->modules |
|
| 286 | - ); |
|
| 287 | - } |
|
| 288 | - |
|
| 289 | - /** |
|
| 290 | - * Takes a module and adds it to the active module collection, |
|
| 291 | - * registering it if necessary. |
|
| 292 | - */ |
|
| 293 | - public function processModule($module) { |
|
| 294 | - if (!isset($this->registeredModules[$module]) || is_object($module)) { |
|
| 295 | - $this->registerModule($module); |
|
| 296 | - } |
|
| 297 | - $this->modules[$module] = $this->registeredModules[$module]; |
|
| 298 | - } |
|
| 299 | - |
|
| 300 | - /** |
|
| 301 | - * Retrieves merged element definitions. |
|
| 302 | - * @return Array of HTMLPurifier_ElementDef |
|
| 303 | - */ |
|
| 304 | - public function getElements() { |
|
| 305 | - |
|
| 306 | - $elements = array(); |
|
| 307 | - foreach ($this->modules as $module) { |
|
| 308 | - if (!$this->trusted && !$module->safe) continue; |
|
| 309 | - foreach ($module->info as $name => $v) { |
|
| 310 | - if (isset($elements[$name])) continue; |
|
| 311 | - $elements[$name] = $this->getElement($name); |
|
| 312 | - } |
|
| 313 | - } |
|
| 314 | - |
|
| 315 | - // remove dud elements, this happens when an element that |
|
| 316 | - // appeared to be safe actually wasn't |
|
| 317 | - foreach ($elements as $n => $v) { |
|
| 318 | - if ($v === false) unset($elements[$n]); |
|
| 319 | - } |
|
| 320 | - |
|
| 321 | - return $elements; |
|
| 322 | - |
|
| 323 | - } |
|
| 324 | - |
|
| 325 | - /** |
|
| 326 | - * Retrieves a single merged element definition |
|
| 327 | - * @param $name Name of element |
|
| 328 | - * @param $trusted Boolean trusted overriding parameter: set to true |
|
| 329 | - * if you want the full version of an element |
|
| 330 | - * @return Merged HTMLPurifier_ElementDef |
|
| 331 | - * @note You may notice that modules are getting iterated over twice (once |
|
| 332 | - * in getElements() and once here). This |
|
| 333 | - * is because |
|
| 334 | - */ |
|
| 335 | - public function getElement($name, $trusted = null) { |
|
| 336 | - |
|
| 337 | - if (!isset($this->elementLookup[$name])) { |
|
| 338 | - return false; |
|
| 339 | - } |
|
| 340 | - |
|
| 341 | - // setup global state variables |
|
| 342 | - $def = false; |
|
| 343 | - if ($trusted === null) $trusted = $this->trusted; |
|
| 344 | - |
|
| 345 | - // iterate through each module that has registered itself to this |
|
| 346 | - // element |
|
| 347 | - foreach($this->elementLookup[$name] as $module_name) { |
|
| 348 | - |
|
| 349 | - $module = $this->modules[$module_name]; |
|
| 350 | - |
|
| 351 | - // refuse to create/merge from a module that is deemed unsafe-- |
|
| 352 | - // pretend the module doesn't exist--when trusted mode is not on. |
|
| 353 | - if (!$trusted && !$module->safe) { |
|
| 354 | - continue; |
|
| 355 | - } |
|
| 356 | - |
|
| 357 | - // clone is used because, ideally speaking, the original |
|
| 358 | - // definition should not be modified. Usually, this will |
|
| 359 | - // make no difference, but for consistency's sake |
|
| 360 | - $new_def = clone $module->info[$name]; |
|
| 361 | - |
|
| 362 | - if (!$def && $new_def->standalone) { |
|
| 363 | - $def = $new_def; |
|
| 364 | - } elseif ($def) { |
|
| 365 | - // This will occur even if $new_def is standalone. In practice, |
|
| 366 | - // this will usually result in a full replacement. |
|
| 367 | - $def->mergeIn($new_def); |
|
| 368 | - } else { |
|
| 369 | - // :TODO: |
|
| 370 | - // non-standalone definitions that don't have a standalone |
|
| 371 | - // to merge into could be deferred to the end |
|
| 372 | - // HOWEVER, it is perfectly valid for a non-standalone |
|
| 373 | - // definition to lack a standalone definition, even |
|
| 374 | - // after all processing: this allows us to safely |
|
| 375 | - // specify extra attributes for elements that may not be |
|
| 376 | - // enabled all in one place. In particular, this might |
|
| 377 | - // be the case for trusted elements. WARNING: care must |
|
| 378 | - // be taken that the /extra/ definitions are all safe. |
|
| 379 | - continue; |
|
| 380 | - } |
|
| 381 | - |
|
| 382 | - // attribute value expansions |
|
| 383 | - $this->attrCollections->performInclusions($def->attr); |
|
| 384 | - $this->attrCollections->expandIdentifiers($def->attr, $this->attrTypes); |
|
| 385 | - |
|
| 386 | - // descendants_are_inline, for ChildDef_Chameleon |
|
| 387 | - if (is_string($def->content_model) && |
|
| 388 | - strpos($def->content_model, 'Inline') !== false) { |
|
| 389 | - if ($name != 'del' && $name != 'ins') { |
|
| 390 | - // this is for you, ins/del |
|
| 391 | - $def->descendants_are_inline = true; |
|
| 392 | - } |
|
| 393 | - } |
|
| 394 | - |
|
| 395 | - $this->contentSets->generateChildDef($def, $module); |
|
| 396 | - } |
|
| 397 | - |
|
| 398 | - // This can occur if there is a blank definition, but no base to |
|
| 399 | - // mix it in with |
|
| 400 | - if (!$def) return false; |
|
| 401 | - |
|
| 402 | - // add information on required attributes |
|
| 403 | - foreach ($def->attr as $attr_name => $attr_def) { |
|
| 404 | - if ($attr_def->required) { |
|
| 405 | - $def->required_attr[] = $attr_name; |
|
| 406 | - } |
|
| 407 | - } |
|
| 408 | - |
|
| 409 | - return $def; |
|
| 410 | - |
|
| 411 | - } |
|
| 6 | + /** |
|
| 7 | + * Instance of HTMLPurifier_DoctypeRegistry |
|
| 8 | + */ |
|
| 9 | + public $doctypes; |
|
| 10 | + |
|
| 11 | + /** |
|
| 12 | + * Instance of current doctype |
|
| 13 | + */ |
|
| 14 | + public $doctype; |
|
| 15 | + |
|
| 16 | + /** |
|
| 17 | + * Instance of HTMLPurifier_AttrTypes |
|
| 18 | + */ |
|
| 19 | + public $attrTypes; |
|
| 20 | + |
|
| 21 | + /** |
|
| 22 | + * Active instances of modules for the specified doctype are |
|
| 23 | + * indexed, by name, in this array. |
|
| 24 | + */ |
|
| 25 | + public $modules = array(); |
|
| 26 | + |
|
| 27 | + /** |
|
| 28 | + * Array of recognized HTMLPurifier_Module instances, indexed by |
|
| 29 | + * module's class name. This array is usually lazy loaded, but a |
|
| 30 | + * user can overload a module by pre-emptively registering it. |
|
| 31 | + */ |
|
| 32 | + public $registeredModules = array(); |
|
| 33 | + |
|
| 34 | + /** |
|
| 35 | + * List of extra modules that were added by the user using addModule(). |
|
| 36 | + * These get unconditionally merged into the current doctype, whatever |
|
| 37 | + * it may be. |
|
| 38 | + */ |
|
| 39 | + public $userModules = array(); |
|
| 40 | + |
|
| 41 | + /** |
|
| 42 | + * Associative array of element name to list of modules that have |
|
| 43 | + * definitions for the element; this array is dynamically filled. |
|
| 44 | + */ |
|
| 45 | + public $elementLookup = array(); |
|
| 46 | + |
|
| 47 | + /** List of prefixes we should use for registering small names */ |
|
| 48 | + public $prefixes = array('HTMLPurifier_HTMLModule_'); |
|
| 49 | + |
|
| 50 | + public $contentSets; /**< Instance of HTMLPurifier_ContentSets */ |
|
| 51 | + public $attrCollections; /**< Instance of HTMLPurifier_AttrCollections */ |
|
| 52 | + |
|
| 53 | + /** If set to true, unsafe elements and attributes will be allowed */ |
|
| 54 | + public $trusted = false; |
|
| 55 | + |
|
| 56 | + public function __construct() { |
|
| 57 | + |
|
| 58 | + // editable internal objects |
|
| 59 | + $this->attrTypes = new HTMLPurifier_AttrTypes(); |
|
| 60 | + $this->doctypes = new HTMLPurifier_DoctypeRegistry(); |
|
| 61 | + |
|
| 62 | + // setup basic modules |
|
| 63 | + $common = array( |
|
| 64 | + 'CommonAttributes', 'Text', 'Hypertext', 'List', |
|
| 65 | + 'Presentation', 'Edit', 'Bdo', 'Tables', 'Image', |
|
| 66 | + 'StyleAttribute', |
|
| 67 | + // Unsafe: |
|
| 68 | + 'Scripting', 'Object', 'Forms', |
|
| 69 | + // Sorta legacy, but present in strict: |
|
| 70 | + 'Name', |
|
| 71 | + ); |
|
| 72 | + $transitional = array('Legacy', 'Target', 'Iframe'); |
|
| 73 | + $xml = array('XMLCommonAttributes'); |
|
| 74 | + $non_xml = array('NonXMLCommonAttributes'); |
|
| 75 | + |
|
| 76 | + // setup basic doctypes |
|
| 77 | + $this->doctypes->register( |
|
| 78 | + 'HTML 4.01 Transitional', false, |
|
| 79 | + array_merge($common, $transitional, $non_xml), |
|
| 80 | + array('Tidy_Transitional', 'Tidy_Proprietary'), |
|
| 81 | + array(), |
|
| 82 | + '-//W3C//DTD HTML 4.01 Transitional//EN', |
|
| 83 | + 'http://www.w3.org/TR/html4/loose.dtd' |
|
| 84 | + ); |
|
| 85 | + |
|
| 86 | + $this->doctypes->register( |
|
| 87 | + 'HTML 4.01 Strict', false, |
|
| 88 | + array_merge($common, $non_xml), |
|
| 89 | + array('Tidy_Strict', 'Tidy_Proprietary', 'Tidy_Name'), |
|
| 90 | + array(), |
|
| 91 | + '-//W3C//DTD HTML 4.01//EN', |
|
| 92 | + 'http://www.w3.org/TR/html4/strict.dtd' |
|
| 93 | + ); |
|
| 94 | + |
|
| 95 | + $this->doctypes->register( |
|
| 96 | + 'XHTML 1.0 Transitional', true, |
|
| 97 | + array_merge($common, $transitional, $xml, $non_xml), |
|
| 98 | + array('Tidy_Transitional', 'Tidy_XHTML', 'Tidy_Proprietary', 'Tidy_Name'), |
|
| 99 | + array(), |
|
| 100 | + '-//W3C//DTD XHTML 1.0 Transitional//EN', |
|
| 101 | + 'http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd' |
|
| 102 | + ); |
|
| 103 | + |
|
| 104 | + $this->doctypes->register( |
|
| 105 | + 'XHTML 1.0 Strict', true, |
|
| 106 | + array_merge($common, $xml, $non_xml), |
|
| 107 | + array('Tidy_Strict', 'Tidy_XHTML', 'Tidy_Strict', 'Tidy_Proprietary', 'Tidy_Name'), |
|
| 108 | + array(), |
|
| 109 | + '-//W3C//DTD XHTML 1.0 Strict//EN', |
|
| 110 | + 'http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd' |
|
| 111 | + ); |
|
| 112 | + |
|
| 113 | + $this->doctypes->register( |
|
| 114 | + 'XHTML 1.1', true, |
|
| 115 | + // Iframe is a real XHTML 1.1 module, despite being |
|
| 116 | + // "transitional"! |
|
| 117 | + array_merge($common, $xml, array('Ruby', 'Iframe')), |
|
| 118 | + array('Tidy_Strict', 'Tidy_XHTML', 'Tidy_Proprietary', 'Tidy_Strict', 'Tidy_Name'), // Tidy_XHTML1_1 |
|
| 119 | + array(), |
|
| 120 | + '-//W3C//DTD XHTML 1.1//EN', |
|
| 121 | + 'http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd' |
|
| 122 | + ); |
|
| 123 | + |
|
| 124 | + } |
|
| 125 | + |
|
| 126 | + /** |
|
| 127 | + * Registers a module to the recognized module list, useful for |
|
| 128 | + * overloading pre-existing modules. |
|
| 129 | + * @param $module Mixed: string module name, with or without |
|
| 130 | + * HTMLPurifier_HTMLModule prefix, or instance of |
|
| 131 | + * subclass of HTMLPurifier_HTMLModule. |
|
| 132 | + * @param $overload Boolean whether or not to overload previous modules. |
|
| 133 | + * If this is not set, and you do overload a module, |
|
| 134 | + * HTML Purifier will complain with a warning. |
|
| 135 | + * @note This function will not call autoload, you must instantiate |
|
| 136 | + * (and thus invoke) autoload outside the method. |
|
| 137 | + * @note If a string is passed as a module name, different variants |
|
| 138 | + * will be tested in this order: |
|
| 139 | + * - Check for HTMLPurifier_HTMLModule_$name |
|
| 140 | + * - Check all prefixes with $name in order they were added |
|
| 141 | + * - Check for literal object name |
|
| 142 | + * - Throw fatal error |
|
| 143 | + * If your object name collides with an internal class, specify |
|
| 144 | + * your module manually. All modules must have been included |
|
| 145 | + * externally: registerModule will not perform inclusions for you! |
|
| 146 | + */ |
|
| 147 | + public function registerModule($module, $overload = false) { |
|
| 148 | + if (is_string($module)) { |
|
| 149 | + // attempt to load the module |
|
| 150 | + $original_module = $module; |
|
| 151 | + $ok = false; |
|
| 152 | + foreach ($this->prefixes as $prefix) { |
|
| 153 | + $module = $prefix . $original_module; |
|
| 154 | + if (class_exists($module)) { |
|
| 155 | + $ok = true; |
|
| 156 | + break; |
|
| 157 | + } |
|
| 158 | + } |
|
| 159 | + if (!$ok) { |
|
| 160 | + $module = $original_module; |
|
| 161 | + if (!class_exists($module)) { |
|
| 162 | + trigger_error($original_module . ' module does not exist', |
|
| 163 | + E_USER_ERROR); |
|
| 164 | + return; |
|
| 165 | + } |
|
| 166 | + } |
|
| 167 | + $module = new $module(); |
|
| 168 | + } |
|
| 169 | + if (empty($module->name)) { |
|
| 170 | + trigger_error('Module instance of ' . get_class($module) . ' must have name'); |
|
| 171 | + return; |
|
| 172 | + } |
|
| 173 | + if (!$overload && isset($this->registeredModules[$module->name])) { |
|
| 174 | + trigger_error('Overloading ' . $module->name . ' without explicit overload parameter', E_USER_WARNING); |
|
| 175 | + } |
|
| 176 | + $this->registeredModules[$module->name] = $module; |
|
| 177 | + } |
|
| 178 | + |
|
| 179 | + /** |
|
| 180 | + * Adds a module to the current doctype by first registering it, |
|
| 181 | + * and then tacking it on to the active doctype |
|
| 182 | + */ |
|
| 183 | + public function addModule($module) { |
|
| 184 | + $this->registerModule($module); |
|
| 185 | + if (is_object($module)) $module = $module->name; |
|
| 186 | + $this->userModules[] = $module; |
|
| 187 | + } |
|
| 188 | + |
|
| 189 | + /** |
|
| 190 | + * Adds a class prefix that registerModule() will use to resolve a |
|
| 191 | + * string name to a concrete class |
|
| 192 | + */ |
|
| 193 | + public function addPrefix($prefix) { |
|
| 194 | + $this->prefixes[] = $prefix; |
|
| 195 | + } |
|
| 196 | + |
|
| 197 | + /** |
|
| 198 | + * Performs processing on modules, after being called you may |
|
| 199 | + * use getElement() and getElements() |
|
| 200 | + * @param $config Instance of HTMLPurifier_Config |
|
| 201 | + */ |
|
| 202 | + public function setup($config) { |
|
| 203 | + |
|
| 204 | + $this->trusted = $config->get('HTML.Trusted'); |
|
| 205 | + |
|
| 206 | + // generate |
|
| 207 | + $this->doctype = $this->doctypes->make($config); |
|
| 208 | + $modules = $this->doctype->modules; |
|
| 209 | + |
|
| 210 | + // take out the default modules that aren't allowed |
|
| 211 | + $lookup = $config->get('HTML.AllowedModules'); |
|
| 212 | + $special_cases = $config->get('HTML.CoreModules'); |
|
| 213 | + |
|
| 214 | + if (is_array($lookup)) { |
|
| 215 | + foreach ($modules as $k => $m) { |
|
| 216 | + if (isset($special_cases[$m])) continue; |
|
| 217 | + if (!isset($lookup[$m])) unset($modules[$k]); |
|
| 218 | + } |
|
| 219 | + } |
|
| 220 | + |
|
| 221 | + // custom modules |
|
| 222 | + if ($config->get('HTML.Proprietary')) { |
|
| 223 | + $modules[] = 'Proprietary'; |
|
| 224 | + } |
|
| 225 | + if ($config->get('HTML.SafeObject')) { |
|
| 226 | + $modules[] = 'SafeObject'; |
|
| 227 | + } |
|
| 228 | + if ($config->get('HTML.SafeEmbed')) { |
|
| 229 | + $modules[] = 'SafeEmbed'; |
|
| 230 | + } |
|
| 231 | + if ($config->get('HTML.Nofollow')) { |
|
| 232 | + $modules[] = 'Nofollow'; |
|
| 233 | + } |
|
| 234 | + if ($config->get('HTML.TargetBlank')) { |
|
| 235 | + $modules[] = 'TargetBlank'; |
|
| 236 | + } |
|
| 237 | + |
|
| 238 | + // merge in custom modules |
|
| 239 | + $modules = array_merge($modules, $this->userModules); |
|
| 240 | + |
|
| 241 | + foreach ($modules as $module) { |
|
| 242 | + $this->processModule($module); |
|
| 243 | + $this->modules[$module]->setup($config); |
|
| 244 | + } |
|
| 245 | + |
|
| 246 | + foreach ($this->doctype->tidyModules as $module) { |
|
| 247 | + $this->processModule($module); |
|
| 248 | + $this->modules[$module]->setup($config); |
|
| 249 | + } |
|
| 250 | + |
|
| 251 | + // prepare any injectors |
|
| 252 | + foreach ($this->modules as $module) { |
|
| 253 | + $n = array(); |
|
| 254 | + foreach ($module->info_injector as $i => $injector) { |
|
| 255 | + if (!is_object($injector)) { |
|
| 256 | + $class = "HTMLPurifier_Injector_$injector"; |
|
| 257 | + $injector = new $class; |
|
| 258 | + } |
|
| 259 | + $n[$injector->name] = $injector; |
|
| 260 | + } |
|
| 261 | + $module->info_injector = $n; |
|
| 262 | + } |
|
| 263 | + |
|
| 264 | + // setup lookup table based on all valid modules |
|
| 265 | + foreach ($this->modules as $module) { |
|
| 266 | + foreach ($module->info as $name => $def) { |
|
| 267 | + if (!isset($this->elementLookup[$name])) { |
|
| 268 | + $this->elementLookup[$name] = array(); |
|
| 269 | + } |
|
| 270 | + $this->elementLookup[$name][] = $module->name; |
|
| 271 | + } |
|
| 272 | + } |
|
| 273 | + |
|
| 274 | + // note the different choice |
|
| 275 | + $this->contentSets = new HTMLPurifier_ContentSets( |
|
| 276 | + // content set assembly deals with all possible modules, |
|
| 277 | + // not just ones deemed to be "safe" |
|
| 278 | + $this->modules |
|
| 279 | + ); |
|
| 280 | + $this->attrCollections = new HTMLPurifier_AttrCollections( |
|
| 281 | + $this->attrTypes, |
|
| 282 | + // there is no way to directly disable a global attribute, |
|
| 283 | + // but using AllowedAttributes or simply not including |
|
| 284 | + // the module in your custom doctype should be sufficient |
|
| 285 | + $this->modules |
|
| 286 | + ); |
|
| 287 | + } |
|
| 288 | + |
|
| 289 | + /** |
|
| 290 | + * Takes a module and adds it to the active module collection, |
|
| 291 | + * registering it if necessary. |
|
| 292 | + */ |
|
| 293 | + public function processModule($module) { |
|
| 294 | + if (!isset($this->registeredModules[$module]) || is_object($module)) { |
|
| 295 | + $this->registerModule($module); |
|
| 296 | + } |
|
| 297 | + $this->modules[$module] = $this->registeredModules[$module]; |
|
| 298 | + } |
|
| 299 | + |
|
| 300 | + /** |
|
| 301 | + * Retrieves merged element definitions. |
|
| 302 | + * @return Array of HTMLPurifier_ElementDef |
|
| 303 | + */ |
|
| 304 | + public function getElements() { |
|
| 305 | + |
|
| 306 | + $elements = array(); |
|
| 307 | + foreach ($this->modules as $module) { |
|
| 308 | + if (!$this->trusted && !$module->safe) continue; |
|
| 309 | + foreach ($module->info as $name => $v) { |
|
| 310 | + if (isset($elements[$name])) continue; |
|
| 311 | + $elements[$name] = $this->getElement($name); |
|
| 312 | + } |
|
| 313 | + } |
|
| 314 | + |
|
| 315 | + // remove dud elements, this happens when an element that |
|
| 316 | + // appeared to be safe actually wasn't |
|
| 317 | + foreach ($elements as $n => $v) { |
|
| 318 | + if ($v === false) unset($elements[$n]); |
|
| 319 | + } |
|
| 320 | + |
|
| 321 | + return $elements; |
|
| 322 | + |
|
| 323 | + } |
|
| 324 | + |
|
| 325 | + /** |
|
| 326 | + * Retrieves a single merged element definition |
|
| 327 | + * @param $name Name of element |
|
| 328 | + * @param $trusted Boolean trusted overriding parameter: set to true |
|
| 329 | + * if you want the full version of an element |
|
| 330 | + * @return Merged HTMLPurifier_ElementDef |
|
| 331 | + * @note You may notice that modules are getting iterated over twice (once |
|
| 332 | + * in getElements() and once here). This |
|
| 333 | + * is because |
|
| 334 | + */ |
|
| 335 | + public function getElement($name, $trusted = null) { |
|
| 336 | + |
|
| 337 | + if (!isset($this->elementLookup[$name])) { |
|
| 338 | + return false; |
|
| 339 | + } |
|
| 340 | + |
|
| 341 | + // setup global state variables |
|
| 342 | + $def = false; |
|
| 343 | + if ($trusted === null) $trusted = $this->trusted; |
|
| 344 | + |
|
| 345 | + // iterate through each module that has registered itself to this |
|
| 346 | + // element |
|
| 347 | + foreach($this->elementLookup[$name] as $module_name) { |
|
| 348 | + |
|
| 349 | + $module = $this->modules[$module_name]; |
|
| 350 | + |
|
| 351 | + // refuse to create/merge from a module that is deemed unsafe-- |
|
| 352 | + // pretend the module doesn't exist--when trusted mode is not on. |
|
| 353 | + if (!$trusted && !$module->safe) { |
|
| 354 | + continue; |
|
| 355 | + } |
|
| 356 | + |
|
| 357 | + // clone is used because, ideally speaking, the original |
|
| 358 | + // definition should not be modified. Usually, this will |
|
| 359 | + // make no difference, but for consistency's sake |
|
| 360 | + $new_def = clone $module->info[$name]; |
|
| 361 | + |
|
| 362 | + if (!$def && $new_def->standalone) { |
|
| 363 | + $def = $new_def; |
|
| 364 | + } elseif ($def) { |
|
| 365 | + // This will occur even if $new_def is standalone. In practice, |
|
| 366 | + // this will usually result in a full replacement. |
|
| 367 | + $def->mergeIn($new_def); |
|
| 368 | + } else { |
|
| 369 | + // :TODO: |
|
| 370 | + // non-standalone definitions that don't have a standalone |
|
| 371 | + // to merge into could be deferred to the end |
|
| 372 | + // HOWEVER, it is perfectly valid for a non-standalone |
|
| 373 | + // definition to lack a standalone definition, even |
|
| 374 | + // after all processing: this allows us to safely |
|
| 375 | + // specify extra attributes for elements that may not be |
|
| 376 | + // enabled all in one place. In particular, this might |
|
| 377 | + // be the case for trusted elements. WARNING: care must |
|
| 378 | + // be taken that the /extra/ definitions are all safe. |
|
| 379 | + continue; |
|
| 380 | + } |
|
| 381 | + |
|
| 382 | + // attribute value expansions |
|
| 383 | + $this->attrCollections->performInclusions($def->attr); |
|
| 384 | + $this->attrCollections->expandIdentifiers($def->attr, $this->attrTypes); |
|
| 385 | + |
|
| 386 | + // descendants_are_inline, for ChildDef_Chameleon |
|
| 387 | + if (is_string($def->content_model) && |
|
| 388 | + strpos($def->content_model, 'Inline') !== false) { |
|
| 389 | + if ($name != 'del' && $name != 'ins') { |
|
| 390 | + // this is for you, ins/del |
|
| 391 | + $def->descendants_are_inline = true; |
|
| 392 | + } |
|
| 393 | + } |
|
| 394 | + |
|
| 395 | + $this->contentSets->generateChildDef($def, $module); |
|
| 396 | + } |
|
| 397 | + |
|
| 398 | + // This can occur if there is a blank definition, but no base to |
|
| 399 | + // mix it in with |
|
| 400 | + if (!$def) return false; |
|
| 401 | + |
|
| 402 | + // add information on required attributes |
|
| 403 | + foreach ($def->attr as $attr_name => $attr_def) { |
|
| 404 | + if ($attr_def->required) { |
|
| 405 | + $def->required_attr[] = $attr_name; |
|
| 406 | + } |
|
| 407 | + } |
|
| 408 | + |
|
| 409 | + return $def; |
|
| 410 | + |
|
| 411 | + } |
|
| 412 | 412 | |
| 413 | 413 | } |
| 414 | 414 | |
@@ -182,7 +182,9 @@ discard block |
||
| 182 | 182 | */ |
| 183 | 183 | public function addModule($module) { |
| 184 | 184 | $this->registerModule($module); |
| 185 | - if (is_object($module)) $module = $module->name; |
|
| 185 | + if (is_object($module)) { |
|
| 186 | + $module = $module->name; |
|
| 187 | + } |
|
| 186 | 188 | $this->userModules[] = $module; |
| 187 | 189 | } |
| 188 | 190 | |
@@ -213,8 +215,12 @@ discard block |
||
| 213 | 215 | |
| 214 | 216 | if (is_array($lookup)) { |
| 215 | 217 | foreach ($modules as $k => $m) { |
| 216 | - if (isset($special_cases[$m])) continue; |
|
| 217 | - if (!isset($lookup[$m])) unset($modules[$k]); |
|
| 218 | + if (isset($special_cases[$m])) { |
|
| 219 | + continue; |
|
| 220 | + } |
|
| 221 | + if (!isset($lookup[$m])) { |
|
| 222 | + unset($modules[$k]); |
|
| 223 | + } |
|
| 218 | 224 | } |
| 219 | 225 | } |
| 220 | 226 | |
@@ -305,9 +311,13 @@ discard block |
||
| 305 | 311 | |
| 306 | 312 | $elements = array(); |
| 307 | 313 | foreach ($this->modules as $module) { |
| 308 | - if (!$this->trusted && !$module->safe) continue; |
|
| 314 | + if (!$this->trusted && !$module->safe) { |
|
| 315 | + continue; |
|
| 316 | + } |
|
| 309 | 317 | foreach ($module->info as $name => $v) { |
| 310 | - if (isset($elements[$name])) continue; |
|
| 318 | + if (isset($elements[$name])) { |
|
| 319 | + continue; |
|
| 320 | + } |
|
| 311 | 321 | $elements[$name] = $this->getElement($name); |
| 312 | 322 | } |
| 313 | 323 | } |
@@ -315,7 +325,9 @@ discard block |
||
| 315 | 325 | // remove dud elements, this happens when an element that |
| 316 | 326 | // appeared to be safe actually wasn't |
| 317 | 327 | foreach ($elements as $n => $v) { |
| 318 | - if ($v === false) unset($elements[$n]); |
|
| 328 | + if ($v === false) { |
|
| 329 | + unset($elements[$n]); |
|
| 330 | + } |
|
| 319 | 331 | } |
| 320 | 332 | |
| 321 | 333 | return $elements; |
@@ -340,7 +352,9 @@ discard block |
||
| 340 | 352 | |
| 341 | 353 | // setup global state variables |
| 342 | 354 | $def = false; |
| 343 | - if ($trusted === null) $trusted = $this->trusted; |
|
| 355 | + if ($trusted === null) { |
|
| 356 | + $trusted = $this->trusted; |
|
| 357 | + } |
|
| 344 | 358 | |
| 345 | 359 | // iterate through each module that has registered itself to this |
| 346 | 360 | // element |
@@ -397,7 +411,9 @@ discard block |
||
| 397 | 411 | |
| 398 | 412 | // This can occur if there is a blank definition, but no base to |
| 399 | 413 | // mix it in with |
| 400 | - if (!$def) return false; |
|
| 414 | + if (!$def) { |
|
| 415 | + return false; |
|
| 416 | + } |
|
| 401 | 417 | |
| 402 | 418 | // add information on required attributes |
| 403 | 419 | foreach ($def->attr as $attr_name => $attr_def) { |
@@ -47,7 +47,7 @@ discard block |
||
| 47 | 47 | /** List of prefixes we should use for registering small names */ |
| 48 | 48 | public $prefixes = array('HTMLPurifier_HTMLModule_'); |
| 49 | 49 | |
| 50 | - public $contentSets; /**< Instance of HTMLPurifier_ContentSets */ |
|
| 50 | + public $contentSets; /**< Instance of HTMLPurifier_ContentSets */ |
|
| 51 | 51 | public $attrCollections; /**< Instance of HTMLPurifier_AttrCollections */ |
| 52 | 52 | |
| 53 | 53 | /** If set to true, unsafe elements and attributes will be allowed */ |
@@ -150,7 +150,7 @@ discard block |
||
| 150 | 150 | $original_module = $module; |
| 151 | 151 | $ok = false; |
| 152 | 152 | foreach ($this->prefixes as $prefix) { |
| 153 | - $module = $prefix . $original_module; |
|
| 153 | + $module = $prefix.$original_module; |
|
| 154 | 154 | if (class_exists($module)) { |
| 155 | 155 | $ok = true; |
| 156 | 156 | break; |
@@ -159,7 +159,7 @@ discard block |
||
| 159 | 159 | if (!$ok) { |
| 160 | 160 | $module = $original_module; |
| 161 | 161 | if (!class_exists($module)) { |
| 162 | - trigger_error($original_module . ' module does not exist', |
|
| 162 | + trigger_error($original_module.' module does not exist', |
|
| 163 | 163 | E_USER_ERROR); |
| 164 | 164 | return; |
| 165 | 165 | } |
@@ -167,11 +167,11 @@ discard block |
||
| 167 | 167 | $module = new $module(); |
| 168 | 168 | } |
| 169 | 169 | if (empty($module->name)) { |
| 170 | - trigger_error('Module instance of ' . get_class($module) . ' must have name'); |
|
| 170 | + trigger_error('Module instance of '.get_class($module).' must have name'); |
|
| 171 | 171 | return; |
| 172 | 172 | } |
| 173 | 173 | if (!$overload && isset($this->registeredModules[$module->name])) { |
| 174 | - trigger_error('Overloading ' . $module->name . ' without explicit overload parameter', E_USER_WARNING); |
|
| 174 | + trigger_error('Overloading '.$module->name.' without explicit overload parameter', E_USER_WARNING); |
|
| 175 | 175 | } |
| 176 | 176 | $this->registeredModules[$module->name] = $module; |
| 177 | 177 | } |
@@ -344,7 +344,7 @@ discard block |
||
| 344 | 344 | |
| 345 | 345 | // iterate through each module that has registered itself to this |
| 346 | 346 | // element |
| 347 | - foreach($this->elementLookup[$name] as $module_name) { |
|
| 347 | + foreach ($this->elementLookup[$name] as $module_name) { |
|
| 348 | 348 | |
| 349 | 349 | $module = $this->modules[$module_name]; |
| 350 | 350 | |
@@ -19,7 +19,7 @@ |
||
| 19 | 19 | * Builds an IDAccumulator, also initializing the default blacklist |
| 20 | 20 | * @param $config Instance of HTMLPurifier_Config |
| 21 | 21 | * @param $context Instance of HTMLPurifier_Context |
| 22 | - * @return Fully initialized HTMLPurifier_IDAccumulator |
|
| 22 | + * @return HTMLPurifier_IDAccumulator initialized HTMLPurifier_IDAccumulator |
|
| 23 | 23 | */ |
| 24 | 24 | public static function build($config, $context) { |
| 25 | 25 | $id_accumulator = new HTMLPurifier_IDAccumulator(); |
@@ -9,44 +9,44 @@ |
||
| 9 | 9 | class HTMLPurifier_IDAccumulator |
| 10 | 10 | { |
| 11 | 11 | |
| 12 | - /** |
|
| 13 | - * Lookup table of IDs we've accumulated. |
|
| 14 | - * @public |
|
| 15 | - */ |
|
| 16 | - public $ids = array(); |
|
| 12 | + /** |
|
| 13 | + * Lookup table of IDs we've accumulated. |
|
| 14 | + * @public |
|
| 15 | + */ |
|
| 16 | + public $ids = array(); |
|
| 17 | 17 | |
| 18 | - /** |
|
| 19 | - * Builds an IDAccumulator, also initializing the default blacklist |
|
| 20 | - * @param $config Instance of HTMLPurifier_Config |
|
| 21 | - * @param $context Instance of HTMLPurifier_Context |
|
| 22 | - * @return Fully initialized HTMLPurifier_IDAccumulator |
|
| 23 | - */ |
|
| 24 | - public static function build($config, $context) { |
|
| 25 | - $id_accumulator = new HTMLPurifier_IDAccumulator(); |
|
| 26 | - $id_accumulator->load($config->get('Attr.IDBlacklist')); |
|
| 27 | - return $id_accumulator; |
|
| 28 | - } |
|
| 18 | + /** |
|
| 19 | + * Builds an IDAccumulator, also initializing the default blacklist |
|
| 20 | + * @param $config Instance of HTMLPurifier_Config |
|
| 21 | + * @param $context Instance of HTMLPurifier_Context |
|
| 22 | + * @return Fully initialized HTMLPurifier_IDAccumulator |
|
| 23 | + */ |
|
| 24 | + public static function build($config, $context) { |
|
| 25 | + $id_accumulator = new HTMLPurifier_IDAccumulator(); |
|
| 26 | + $id_accumulator->load($config->get('Attr.IDBlacklist')); |
|
| 27 | + return $id_accumulator; |
|
| 28 | + } |
|
| 29 | 29 | |
| 30 | - /** |
|
| 31 | - * Add an ID to the lookup table. |
|
| 32 | - * @param $id ID to be added. |
|
| 33 | - * @return Bool status, true if success, false if there's a dupe |
|
| 34 | - */ |
|
| 35 | - public function add($id) { |
|
| 36 | - if (isset($this->ids[$id])) return false; |
|
| 37 | - return $this->ids[$id] = true; |
|
| 38 | - } |
|
| 30 | + /** |
|
| 31 | + * Add an ID to the lookup table. |
|
| 32 | + * @param $id ID to be added. |
|
| 33 | + * @return Bool status, true if success, false if there's a dupe |
|
| 34 | + */ |
|
| 35 | + public function add($id) { |
|
| 36 | + if (isset($this->ids[$id])) return false; |
|
| 37 | + return $this->ids[$id] = true; |
|
| 38 | + } |
|
| 39 | 39 | |
| 40 | - /** |
|
| 41 | - * Load a list of IDs into the lookup table |
|
| 42 | - * @param $array_of_ids Array of IDs to load |
|
| 43 | - * @note This function doesn't care about duplicates |
|
| 44 | - */ |
|
| 45 | - public function load($array_of_ids) { |
|
| 46 | - foreach ($array_of_ids as $id) { |
|
| 47 | - $this->ids[$id] = true; |
|
| 48 | - } |
|
| 49 | - } |
|
| 40 | + /** |
|
| 41 | + * Load a list of IDs into the lookup table |
|
| 42 | + * @param $array_of_ids Array of IDs to load |
|
| 43 | + * @note This function doesn't care about duplicates |
|
| 44 | + */ |
|
| 45 | + public function load($array_of_ids) { |
|
| 46 | + foreach ($array_of_ids as $id) { |
|
| 47 | + $this->ids[$id] = true; |
|
| 48 | + } |
|
| 49 | + } |
|
| 50 | 50 | |
| 51 | 51 | } |
| 52 | 52 | |
@@ -33,7 +33,9 @@ |
||
| 33 | 33 | * @return Bool status, true if success, false if there's a dupe |
| 34 | 34 | */ |
| 35 | 35 | public function add($id) { |
| 36 | - if (isset($this->ids[$id])) return false; |
|
| 36 | + if (isset($this->ids[$id])) { |
|
| 37 | + return false; |
|
| 38 | + } |
|
| 37 | 39 | return $this->ids[$id] = true; |
| 38 | 40 | } |
| 39 | 41 | |
@@ -64,6 +64,7 @@ discard block |
||
| 64 | 64 | * result in infinite loops if not used carefully. |
| 65 | 65 | * @warning HTML Purifier will prevent you from fast-forwarding with this |
| 66 | 66 | * function. |
| 67 | + * @param integer $index |
|
| 67 | 68 | */ |
| 68 | 69 | public function rewind($index) { |
| 69 | 70 | $this->rewind = $index; |
@@ -123,8 +124,8 @@ discard block |
||
| 123 | 124 | |
| 124 | 125 | /** |
| 125 | 126 | * Tests if the context node allows a certain element |
| 126 | - * @param $name Name of element to test for |
|
| 127 | - * @return True if element is allowed, false if it is not |
|
| 127 | + * @param string $name Name of element to test for |
|
| 128 | + * @return boolean if element is allowed, false if it is not |
|
| 128 | 129 | */ |
| 129 | 130 | public function allowsElement($name) { |
| 130 | 131 | if (!empty($this->currentNesting)) { |
@@ -16,222 +16,222 @@ |
||
| 16 | 16 | abstract class HTMLPurifier_Injector |
| 17 | 17 | { |
| 18 | 18 | |
| 19 | - /** |
|
| 20 | - * Advisory name of injector, this is for friendly error messages |
|
| 21 | - */ |
|
| 22 | - public $name; |
|
| 23 | - |
|
| 24 | - /** |
|
| 25 | - * Instance of HTMLPurifier_HTMLDefinition |
|
| 26 | - */ |
|
| 27 | - protected $htmlDefinition; |
|
| 28 | - |
|
| 29 | - /** |
|
| 30 | - * Reference to CurrentNesting variable in Context. This is an array |
|
| 31 | - * list of tokens that we are currently "inside" |
|
| 32 | - */ |
|
| 33 | - protected $currentNesting; |
|
| 34 | - |
|
| 35 | - /** |
|
| 36 | - * Reference to InputTokens variable in Context. This is an array |
|
| 37 | - * list of the input tokens that are being processed. |
|
| 38 | - */ |
|
| 39 | - protected $inputTokens; |
|
| 40 | - |
|
| 41 | - /** |
|
| 42 | - * Reference to InputIndex variable in Context. This is an integer |
|
| 43 | - * array index for $this->inputTokens that indicates what token |
|
| 44 | - * is currently being processed. |
|
| 45 | - */ |
|
| 46 | - protected $inputIndex; |
|
| 47 | - |
|
| 48 | - /** |
|
| 49 | - * Array of elements and attributes this injector creates and therefore |
|
| 50 | - * need to be allowed by the definition. Takes form of |
|
| 51 | - * array('element' => array('attr', 'attr2'), 'element2') |
|
| 52 | - */ |
|
| 53 | - public $needed = array(); |
|
| 54 | - |
|
| 55 | - /** |
|
| 56 | - * Index of inputTokens to rewind to. |
|
| 57 | - */ |
|
| 58 | - protected $rewind = false; |
|
| 59 | - |
|
| 60 | - /** |
|
| 61 | - * Rewind to a spot to re-perform processing. This is useful if you |
|
| 62 | - * deleted a node, and now need to see if this change affected any |
|
| 63 | - * earlier nodes. Rewinding does not affect other injectors, and can |
|
| 64 | - * result in infinite loops if not used carefully. |
|
| 65 | - * @warning HTML Purifier will prevent you from fast-forwarding with this |
|
| 66 | - * function. |
|
| 67 | - */ |
|
| 68 | - public function rewind($index) { |
|
| 69 | - $this->rewind = $index; |
|
| 70 | - } |
|
| 71 | - |
|
| 72 | - /** |
|
| 73 | - * Retrieves rewind, and then unsets it. |
|
| 74 | - */ |
|
| 75 | - public function getRewind() { |
|
| 76 | - $r = $this->rewind; |
|
| 77 | - $this->rewind = false; |
|
| 78 | - return $r; |
|
| 79 | - } |
|
| 80 | - |
|
| 81 | - /** |
|
| 82 | - * Prepares the injector by giving it the config and context objects: |
|
| 83 | - * this allows references to important variables to be made within |
|
| 84 | - * the injector. This function also checks if the HTML environment |
|
| 85 | - * will work with the Injector (see checkNeeded()). |
|
| 86 | - * @param $config Instance of HTMLPurifier_Config |
|
| 87 | - * @param $context Instance of HTMLPurifier_Context |
|
| 88 | - * @return Boolean false if success, string of missing needed element/attribute if failure |
|
| 89 | - */ |
|
| 90 | - public function prepare($config, $context) { |
|
| 91 | - $this->htmlDefinition = $config->getHTMLDefinition(); |
|
| 92 | - // Even though this might fail, some unit tests ignore this and |
|
| 93 | - // still test checkNeeded, so be careful. Maybe get rid of that |
|
| 94 | - // dependency. |
|
| 95 | - $result = $this->checkNeeded($config); |
|
| 96 | - if ($result !== false) return $result; |
|
| 97 | - $this->currentNesting =& $context->get('CurrentNesting'); |
|
| 98 | - $this->inputTokens =& $context->get('InputTokens'); |
|
| 99 | - $this->inputIndex =& $context->get('InputIndex'); |
|
| 100 | - return false; |
|
| 101 | - } |
|
| 102 | - |
|
| 103 | - /** |
|
| 104 | - * This function checks if the HTML environment |
|
| 105 | - * will work with the Injector: if p tags are not allowed, the |
|
| 106 | - * Auto-Paragraphing injector should not be enabled. |
|
| 107 | - * @param $config Instance of HTMLPurifier_Config |
|
| 108 | - * @param $context Instance of HTMLPurifier_Context |
|
| 109 | - * @return Boolean false if success, string of missing needed element/attribute if failure |
|
| 110 | - */ |
|
| 111 | - public function checkNeeded($config) { |
|
| 112 | - $def = $config->getHTMLDefinition(); |
|
| 113 | - foreach ($this->needed as $element => $attributes) { |
|
| 114 | - if (is_int($element)) $element = $attributes; |
|
| 115 | - if (!isset($def->info[$element])) return $element; |
|
| 116 | - if (!is_array($attributes)) continue; |
|
| 117 | - foreach ($attributes as $name) { |
|
| 118 | - if (!isset($def->info[$element]->attr[$name])) return "$element.$name"; |
|
| 119 | - } |
|
| 120 | - } |
|
| 121 | - return false; |
|
| 122 | - } |
|
| 123 | - |
|
| 124 | - /** |
|
| 125 | - * Tests if the context node allows a certain element |
|
| 126 | - * @param $name Name of element to test for |
|
| 127 | - * @return True if element is allowed, false if it is not |
|
| 128 | - */ |
|
| 129 | - public function allowsElement($name) { |
|
| 130 | - if (!empty($this->currentNesting)) { |
|
| 131 | - $parent_token = array_pop($this->currentNesting); |
|
| 132 | - $this->currentNesting[] = $parent_token; |
|
| 133 | - $parent = $this->htmlDefinition->info[$parent_token->name]; |
|
| 134 | - } else { |
|
| 135 | - $parent = $this->htmlDefinition->info_parent_def; |
|
| 136 | - } |
|
| 137 | - if (!isset($parent->child->elements[$name]) || isset($parent->excludes[$name])) { |
|
| 138 | - return false; |
|
| 139 | - } |
|
| 140 | - // check for exclusion |
|
| 141 | - for ($i = count($this->currentNesting) - 2; $i >= 0; $i--) { |
|
| 142 | - $node = $this->currentNesting[$i]; |
|
| 143 | - $def = $this->htmlDefinition->info[$node->name]; |
|
| 144 | - if (isset($def->excludes[$name])) return false; |
|
| 145 | - } |
|
| 146 | - return true; |
|
| 147 | - } |
|
| 148 | - |
|
| 149 | - /** |
|
| 150 | - * Iterator function, which starts with the next token and continues until |
|
| 151 | - * you reach the end of the input tokens. |
|
| 152 | - * @warning Please prevent previous references from interfering with this |
|
| 153 | - * functions by setting $i = null beforehand! |
|
| 154 | - * @param &$i Current integer index variable for inputTokens |
|
| 155 | - * @param &$current Current token variable. Do NOT use $token, as that variable is also a reference |
|
| 156 | - */ |
|
| 157 | - protected function forward(&$i, &$current) { |
|
| 158 | - if ($i === null) $i = $this->inputIndex + 1; |
|
| 159 | - else $i++; |
|
| 160 | - if (!isset($this->inputTokens[$i])) return false; |
|
| 161 | - $current = $this->inputTokens[$i]; |
|
| 162 | - return true; |
|
| 163 | - } |
|
| 164 | - |
|
| 165 | - /** |
|
| 166 | - * Similar to _forward, but accepts a third parameter $nesting (which |
|
| 167 | - * should be initialized at 0) and stops when we hit the end tag |
|
| 168 | - * for the node $this->inputIndex starts in. |
|
| 169 | - */ |
|
| 170 | - protected function forwardUntilEndToken(&$i, &$current, &$nesting) { |
|
| 171 | - $result = $this->forward($i, $current); |
|
| 172 | - if (!$result) return false; |
|
| 173 | - if ($nesting === null) $nesting = 0; |
|
| 174 | - if ($current instanceof HTMLPurifier_Token_Start) $nesting++; |
|
| 175 | - elseif ($current instanceof HTMLPurifier_Token_End) { |
|
| 176 | - if ($nesting <= 0) return false; |
|
| 177 | - $nesting--; |
|
| 178 | - } |
|
| 179 | - return true; |
|
| 180 | - } |
|
| 181 | - |
|
| 182 | - /** |
|
| 183 | - * Iterator function, starts with the previous token and continues until |
|
| 184 | - * you reach the beginning of input tokens. |
|
| 185 | - * @warning Please prevent previous references from interfering with this |
|
| 186 | - * functions by setting $i = null beforehand! |
|
| 187 | - * @param &$i Current integer index variable for inputTokens |
|
| 188 | - * @param &$current Current token variable. Do NOT use $token, as that variable is also a reference |
|
| 189 | - */ |
|
| 190 | - protected function backward(&$i, &$current) { |
|
| 191 | - if ($i === null) $i = $this->inputIndex - 1; |
|
| 192 | - else $i--; |
|
| 193 | - if ($i < 0) return false; |
|
| 194 | - $current = $this->inputTokens[$i]; |
|
| 195 | - return true; |
|
| 196 | - } |
|
| 197 | - |
|
| 198 | - /** |
|
| 199 | - * Initializes the iterator at the current position. Use in a do {} while; |
|
| 200 | - * loop to force the _forward and _backward functions to start at the |
|
| 201 | - * current location. |
|
| 202 | - * @warning Please prevent previous references from interfering with this |
|
| 203 | - * functions by setting $i = null beforehand! |
|
| 204 | - * @param &$i Current integer index variable for inputTokens |
|
| 205 | - * @param &$current Current token variable. Do NOT use $token, as that variable is also a reference |
|
| 206 | - */ |
|
| 207 | - protected function current(&$i, &$current) { |
|
| 208 | - if ($i === null) $i = $this->inputIndex; |
|
| 209 | - $current = $this->inputTokens[$i]; |
|
| 210 | - } |
|
| 211 | - |
|
| 212 | - /** |
|
| 213 | - * Handler that is called when a text token is processed |
|
| 214 | - */ |
|
| 215 | - public function handleText(&$token) {} |
|
| 216 | - |
|
| 217 | - /** |
|
| 218 | - * Handler that is called when a start or empty token is processed |
|
| 219 | - */ |
|
| 220 | - public function handleElement(&$token) {} |
|
| 221 | - |
|
| 222 | - /** |
|
| 223 | - * Handler that is called when an end token is processed |
|
| 224 | - */ |
|
| 225 | - public function handleEnd(&$token) { |
|
| 226 | - $this->notifyEnd($token); |
|
| 227 | - } |
|
| 228 | - |
|
| 229 | - /** |
|
| 230 | - * Notifier that is called when an end token is processed |
|
| 231 | - * @note This differs from handlers in that the token is read-only |
|
| 232 | - * @deprecated |
|
| 233 | - */ |
|
| 234 | - public function notifyEnd($token) {} |
|
| 19 | + /** |
|
| 20 | + * Advisory name of injector, this is for friendly error messages |
|
| 21 | + */ |
|
| 22 | + public $name; |
|
| 23 | + |
|
| 24 | + /** |
|
| 25 | + * Instance of HTMLPurifier_HTMLDefinition |
|
| 26 | + */ |
|
| 27 | + protected $htmlDefinition; |
|
| 28 | + |
|
| 29 | + /** |
|
| 30 | + * Reference to CurrentNesting variable in Context. This is an array |
|
| 31 | + * list of tokens that we are currently "inside" |
|
| 32 | + */ |
|
| 33 | + protected $currentNesting; |
|
| 34 | + |
|
| 35 | + /** |
|
| 36 | + * Reference to InputTokens variable in Context. This is an array |
|
| 37 | + * list of the input tokens that are being processed. |
|
| 38 | + */ |
|
| 39 | + protected $inputTokens; |
|
| 40 | + |
|
| 41 | + /** |
|
| 42 | + * Reference to InputIndex variable in Context. This is an integer |
|
| 43 | + * array index for $this->inputTokens that indicates what token |
|
| 44 | + * is currently being processed. |
|
| 45 | + */ |
|
| 46 | + protected $inputIndex; |
|
| 47 | + |
|
| 48 | + /** |
|
| 49 | + * Array of elements and attributes this injector creates and therefore |
|
| 50 | + * need to be allowed by the definition. Takes form of |
|
| 51 | + * array('element' => array('attr', 'attr2'), 'element2') |
|
| 52 | + */ |
|
| 53 | + public $needed = array(); |
|
| 54 | + |
|
| 55 | + /** |
|
| 56 | + * Index of inputTokens to rewind to. |
|
| 57 | + */ |
|
| 58 | + protected $rewind = false; |
|
| 59 | + |
|
| 60 | + /** |
|
| 61 | + * Rewind to a spot to re-perform processing. This is useful if you |
|
| 62 | + * deleted a node, and now need to see if this change affected any |
|
| 63 | + * earlier nodes. Rewinding does not affect other injectors, and can |
|
| 64 | + * result in infinite loops if not used carefully. |
|
| 65 | + * @warning HTML Purifier will prevent you from fast-forwarding with this |
|
| 66 | + * function. |
|
| 67 | + */ |
|
| 68 | + public function rewind($index) { |
|
| 69 | + $this->rewind = $index; |
|
| 70 | + } |
|
| 71 | + |
|
| 72 | + /** |
|
| 73 | + * Retrieves rewind, and then unsets it. |
|
| 74 | + */ |
|
| 75 | + public function getRewind() { |
|
| 76 | + $r = $this->rewind; |
|
| 77 | + $this->rewind = false; |
|
| 78 | + return $r; |
|
| 79 | + } |
|
| 80 | + |
|
| 81 | + /** |
|
| 82 | + * Prepares the injector by giving it the config and context objects: |
|
| 83 | + * this allows references to important variables to be made within |
|
| 84 | + * the injector. This function also checks if the HTML environment |
|
| 85 | + * will work with the Injector (see checkNeeded()). |
|
| 86 | + * @param $config Instance of HTMLPurifier_Config |
|
| 87 | + * @param $context Instance of HTMLPurifier_Context |
|
| 88 | + * @return Boolean false if success, string of missing needed element/attribute if failure |
|
| 89 | + */ |
|
| 90 | + public function prepare($config, $context) { |
|
| 91 | + $this->htmlDefinition = $config->getHTMLDefinition(); |
|
| 92 | + // Even though this might fail, some unit tests ignore this and |
|
| 93 | + // still test checkNeeded, so be careful. Maybe get rid of that |
|
| 94 | + // dependency. |
|
| 95 | + $result = $this->checkNeeded($config); |
|
| 96 | + if ($result !== false) return $result; |
|
| 97 | + $this->currentNesting =& $context->get('CurrentNesting'); |
|
| 98 | + $this->inputTokens =& $context->get('InputTokens'); |
|
| 99 | + $this->inputIndex =& $context->get('InputIndex'); |
|
| 100 | + return false; |
|
| 101 | + } |
|
| 102 | + |
|
| 103 | + /** |
|
| 104 | + * This function checks if the HTML environment |
|
| 105 | + * will work with the Injector: if p tags are not allowed, the |
|
| 106 | + * Auto-Paragraphing injector should not be enabled. |
|
| 107 | + * @param $config Instance of HTMLPurifier_Config |
|
| 108 | + * @param $context Instance of HTMLPurifier_Context |
|
| 109 | + * @return Boolean false if success, string of missing needed element/attribute if failure |
|
| 110 | + */ |
|
| 111 | + public function checkNeeded($config) { |
|
| 112 | + $def = $config->getHTMLDefinition(); |
|
| 113 | + foreach ($this->needed as $element => $attributes) { |
|
| 114 | + if (is_int($element)) $element = $attributes; |
|
| 115 | + if (!isset($def->info[$element])) return $element; |
|
| 116 | + if (!is_array($attributes)) continue; |
|
| 117 | + foreach ($attributes as $name) { |
|
| 118 | + if (!isset($def->info[$element]->attr[$name])) return "$element.$name"; |
|
| 119 | + } |
|
| 120 | + } |
|
| 121 | + return false; |
|
| 122 | + } |
|
| 123 | + |
|
| 124 | + /** |
|
| 125 | + * Tests if the context node allows a certain element |
|
| 126 | + * @param $name Name of element to test for |
|
| 127 | + * @return True if element is allowed, false if it is not |
|
| 128 | + */ |
|
| 129 | + public function allowsElement($name) { |
|
| 130 | + if (!empty($this->currentNesting)) { |
|
| 131 | + $parent_token = array_pop($this->currentNesting); |
|
| 132 | + $this->currentNesting[] = $parent_token; |
|
| 133 | + $parent = $this->htmlDefinition->info[$parent_token->name]; |
|
| 134 | + } else { |
|
| 135 | + $parent = $this->htmlDefinition->info_parent_def; |
|
| 136 | + } |
|
| 137 | + if (!isset($parent->child->elements[$name]) || isset($parent->excludes[$name])) { |
|
| 138 | + return false; |
|
| 139 | + } |
|
| 140 | + // check for exclusion |
|
| 141 | + for ($i = count($this->currentNesting) - 2; $i >= 0; $i--) { |
|
| 142 | + $node = $this->currentNesting[$i]; |
|
| 143 | + $def = $this->htmlDefinition->info[$node->name]; |
|
| 144 | + if (isset($def->excludes[$name])) return false; |
|
| 145 | + } |
|
| 146 | + return true; |
|
| 147 | + } |
|
| 148 | + |
|
| 149 | + /** |
|
| 150 | + * Iterator function, which starts with the next token and continues until |
|
| 151 | + * you reach the end of the input tokens. |
|
| 152 | + * @warning Please prevent previous references from interfering with this |
|
| 153 | + * functions by setting $i = null beforehand! |
|
| 154 | + * @param &$i Current integer index variable for inputTokens |
|
| 155 | + * @param &$current Current token variable. Do NOT use $token, as that variable is also a reference |
|
| 156 | + */ |
|
| 157 | + protected function forward(&$i, &$current) { |
|
| 158 | + if ($i === null) $i = $this->inputIndex + 1; |
|
| 159 | + else $i++; |
|
| 160 | + if (!isset($this->inputTokens[$i])) return false; |
|
| 161 | + $current = $this->inputTokens[$i]; |
|
| 162 | + return true; |
|
| 163 | + } |
|
| 164 | + |
|
| 165 | + /** |
|
| 166 | + * Similar to _forward, but accepts a third parameter $nesting (which |
|
| 167 | + * should be initialized at 0) and stops when we hit the end tag |
|
| 168 | + * for the node $this->inputIndex starts in. |
|
| 169 | + */ |
|
| 170 | + protected function forwardUntilEndToken(&$i, &$current, &$nesting) { |
|
| 171 | + $result = $this->forward($i, $current); |
|
| 172 | + if (!$result) return false; |
|
| 173 | + if ($nesting === null) $nesting = 0; |
|
| 174 | + if ($current instanceof HTMLPurifier_Token_Start) $nesting++; |
|
| 175 | + elseif ($current instanceof HTMLPurifier_Token_End) { |
|
| 176 | + if ($nesting <= 0) return false; |
|
| 177 | + $nesting--; |
|
| 178 | + } |
|
| 179 | + return true; |
|
| 180 | + } |
|
| 181 | + |
|
| 182 | + /** |
|
| 183 | + * Iterator function, starts with the previous token and continues until |
|
| 184 | + * you reach the beginning of input tokens. |
|
| 185 | + * @warning Please prevent previous references from interfering with this |
|
| 186 | + * functions by setting $i = null beforehand! |
|
| 187 | + * @param &$i Current integer index variable for inputTokens |
|
| 188 | + * @param &$current Current token variable. Do NOT use $token, as that variable is also a reference |
|
| 189 | + */ |
|
| 190 | + protected function backward(&$i, &$current) { |
|
| 191 | + if ($i === null) $i = $this->inputIndex - 1; |
|
| 192 | + else $i--; |
|
| 193 | + if ($i < 0) return false; |
|
| 194 | + $current = $this->inputTokens[$i]; |
|
| 195 | + return true; |
|
| 196 | + } |
|
| 197 | + |
|
| 198 | + /** |
|
| 199 | + * Initializes the iterator at the current position. Use in a do {} while; |
|
| 200 | + * loop to force the _forward and _backward functions to start at the |
|
| 201 | + * current location. |
|
| 202 | + * @warning Please prevent previous references from interfering with this |
|
| 203 | + * functions by setting $i = null beforehand! |
|
| 204 | + * @param &$i Current integer index variable for inputTokens |
|
| 205 | + * @param &$current Current token variable. Do NOT use $token, as that variable is also a reference |
|
| 206 | + */ |
|
| 207 | + protected function current(&$i, &$current) { |
|
| 208 | + if ($i === null) $i = $this->inputIndex; |
|
| 209 | + $current = $this->inputTokens[$i]; |
|
| 210 | + } |
|
| 211 | + |
|
| 212 | + /** |
|
| 213 | + * Handler that is called when a text token is processed |
|
| 214 | + */ |
|
| 215 | + public function handleText(&$token) {} |
|
| 216 | + |
|
| 217 | + /** |
|
| 218 | + * Handler that is called when a start or empty token is processed |
|
| 219 | + */ |
|
| 220 | + public function handleElement(&$token) {} |
|
| 221 | + |
|
| 222 | + /** |
|
| 223 | + * Handler that is called when an end token is processed |
|
| 224 | + */ |
|
| 225 | + public function handleEnd(&$token) { |
|
| 226 | + $this->notifyEnd($token); |
|
| 227 | + } |
|
| 228 | + |
|
| 229 | + /** |
|
| 230 | + * Notifier that is called when an end token is processed |
|
| 231 | + * @note This differs from handlers in that the token is read-only |
|
| 232 | + * @deprecated |
|
| 233 | + */ |
|
| 234 | + public function notifyEnd($token) {} |
|
| 235 | 235 | |
| 236 | 236 | |
| 237 | 237 | } |
@@ -94,9 +94,9 @@ discard block |
||
| 94 | 94 | // dependency. |
| 95 | 95 | $result = $this->checkNeeded($config); |
| 96 | 96 | if ($result !== false) return $result; |
| 97 | - $this->currentNesting =& $context->get('CurrentNesting'); |
|
| 98 | - $this->inputTokens =& $context->get('InputTokens'); |
|
| 99 | - $this->inputIndex =& $context->get('InputIndex'); |
|
| 97 | + $this->currentNesting = & $context->get('CurrentNesting'); |
|
| 98 | + $this->inputTokens = & $context->get('InputTokens'); |
|
| 99 | + $this->inputIndex = & $context->get('InputIndex'); |
|
| 100 | 100 | return false; |
| 101 | 101 | } |
| 102 | 102 | |
@@ -171,7 +171,7 @@ discard block |
||
| 171 | 171 | $result = $this->forward($i, $current); |
| 172 | 172 | if (!$result) return false; |
| 173 | 173 | if ($nesting === null) $nesting = 0; |
| 174 | - if ($current instanceof HTMLPurifier_Token_Start) $nesting++; |
|
| 174 | + if ($current instanceof HTMLPurifier_Token_Start) $nesting++; |
|
| 175 | 175 | elseif ($current instanceof HTMLPurifier_Token_End) { |
| 176 | 176 | if ($nesting <= 0) return false; |
| 177 | 177 | $nesting--; |
@@ -93,7 +93,9 @@ discard block |
||
| 93 | 93 | // still test checkNeeded, so be careful. Maybe get rid of that |
| 94 | 94 | // dependency. |
| 95 | 95 | $result = $this->checkNeeded($config); |
| 96 | - if ($result !== false) return $result; |
|
| 96 | + if ($result !== false) { |
|
| 97 | + return $result; |
|
| 98 | + } |
|
| 97 | 99 | $this->currentNesting =& $context->get('CurrentNesting'); |
| 98 | 100 | $this->inputTokens =& $context->get('InputTokens'); |
| 99 | 101 | $this->inputIndex =& $context->get('InputIndex'); |
@@ -111,11 +113,19 @@ discard block |
||
| 111 | 113 | public function checkNeeded($config) { |
| 112 | 114 | $def = $config->getHTMLDefinition(); |
| 113 | 115 | foreach ($this->needed as $element => $attributes) { |
| 114 | - if (is_int($element)) $element = $attributes; |
|
| 115 | - if (!isset($def->info[$element])) return $element; |
|
| 116 | - if (!is_array($attributes)) continue; |
|
| 116 | + if (is_int($element)) { |
|
| 117 | + $element = $attributes; |
|
| 118 | + } |
|
| 119 | + if (!isset($def->info[$element])) { |
|
| 120 | + return $element; |
|
| 121 | + } |
|
| 122 | + if (!is_array($attributes)) { |
|
| 123 | + continue; |
|
| 124 | + } |
|
| 117 | 125 | foreach ($attributes as $name) { |
| 118 | - if (!isset($def->info[$element]->attr[$name])) return "$element.$name"; |
|
| 126 | + if (!isset($def->info[$element]->attr[$name])) { |
|
| 127 | + return "$element.$name"; |
|
| 128 | + } |
|
| 119 | 129 | } |
| 120 | 130 | } |
| 121 | 131 | return false; |
@@ -141,7 +151,9 @@ discard block |
||
| 141 | 151 | for ($i = count($this->currentNesting) - 2; $i >= 0; $i--) { |
| 142 | 152 | $node = $this->currentNesting[$i]; |
| 143 | 153 | $def = $this->htmlDefinition->info[$node->name]; |
| 144 | - if (isset($def->excludes[$name])) return false; |
|
| 154 | + if (isset($def->excludes[$name])) { |
|
| 155 | + return false; |
|
| 156 | + } |
|
| 145 | 157 | } |
| 146 | 158 | return true; |
| 147 | 159 | } |
@@ -155,9 +167,14 @@ discard block |
||
| 155 | 167 | * @param &$current Current token variable. Do NOT use $token, as that variable is also a reference |
| 156 | 168 | */ |
| 157 | 169 | protected function forward(&$i, &$current) { |
| 158 | - if ($i === null) $i = $this->inputIndex + 1; |
|
| 159 | - else $i++; |
|
| 160 | - if (!isset($this->inputTokens[$i])) return false; |
|
| 170 | + if ($i === null) { |
|
| 171 | + $i = $this->inputIndex + 1; |
|
| 172 | + } else { |
|
| 173 | + $i++; |
|
| 174 | + } |
|
| 175 | + if (!isset($this->inputTokens[$i])) { |
|
| 176 | + return false; |
|
| 177 | + } |
|
| 161 | 178 | $current = $this->inputTokens[$i]; |
| 162 | 179 | return true; |
| 163 | 180 | } |
@@ -169,11 +186,18 @@ discard block |
||
| 169 | 186 | */ |
| 170 | 187 | protected function forwardUntilEndToken(&$i, &$current, &$nesting) { |
| 171 | 188 | $result = $this->forward($i, $current); |
| 172 | - if (!$result) return false; |
|
| 173 | - if ($nesting === null) $nesting = 0; |
|
| 174 | - if ($current instanceof HTMLPurifier_Token_Start) $nesting++; |
|
| 175 | - elseif ($current instanceof HTMLPurifier_Token_End) { |
|
| 176 | - if ($nesting <= 0) return false; |
|
| 189 | + if (!$result) { |
|
| 190 | + return false; |
|
| 191 | + } |
|
| 192 | + if ($nesting === null) { |
|
| 193 | + $nesting = 0; |
|
| 194 | + } |
|
| 195 | + if ($current instanceof HTMLPurifier_Token_Start) { |
|
| 196 | + $nesting++; |
|
| 197 | + } elseif ($current instanceof HTMLPurifier_Token_End) { |
|
| 198 | + if ($nesting <= 0) { |
|
| 199 | + return false; |
|
| 200 | + } |
|
| 177 | 201 | $nesting--; |
| 178 | 202 | } |
| 179 | 203 | return true; |
@@ -188,9 +212,14 @@ discard block |
||
| 188 | 212 | * @param &$current Current token variable. Do NOT use $token, as that variable is also a reference |
| 189 | 213 | */ |
| 190 | 214 | protected function backward(&$i, &$current) { |
| 191 | - if ($i === null) $i = $this->inputIndex - 1; |
|
| 192 | - else $i--; |
|
| 193 | - if ($i < 0) return false; |
|
| 215 | + if ($i === null) { |
|
| 216 | + $i = $this->inputIndex - 1; |
|
| 217 | + } else { |
|
| 218 | + $i--; |
|
| 219 | + } |
|
| 220 | + if ($i < 0) { |
|
| 221 | + return false; |
|
| 222 | + } |
|
| 194 | 223 | $current = $this->inputTokens[$i]; |
| 195 | 224 | return true; |
| 196 | 225 | } |
@@ -205,7 +234,9 @@ discard block |
||
| 205 | 234 | * @param &$current Current token variable. Do NOT use $token, as that variable is also a reference |
| 206 | 235 | */ |
| 207 | 236 | protected function current(&$i, &$current) { |
| 208 | - if ($i === null) $i = $this->inputIndex; |
|
| 237 | + if ($i === null) { |
|
| 238 | + $i = $this->inputIndex; |
|
| 239 | + } |
|
| 209 | 240 | $current = $this->inputTokens[$i]; |
| 210 | 241 | } |
| 211 | 242 | |
@@ -66,7 +66,7 @@ |
||
| 66 | 66 | |
| 67 | 67 | /** |
| 68 | 68 | * Retrieves a localised message. |
| 69 | - * @param $key string identifier of message |
|
| 69 | + * @param string $key string identifier of message |
|
| 70 | 70 | * @return string localised message |
| 71 | 71 | */ |
| 72 | 72 | public function getMessage($key) { |
@@ -7,156 +7,156 @@ |
||
| 7 | 7 | class HTMLPurifier_Language |
| 8 | 8 | { |
| 9 | 9 | |
| 10 | - /** |
|
| 11 | - * ISO 639 language code of language. Prefers shortest possible version |
|
| 12 | - */ |
|
| 13 | - public $code = 'en'; |
|
| 14 | - |
|
| 15 | - /** |
|
| 16 | - * Fallback language code |
|
| 17 | - */ |
|
| 18 | - public $fallback = false; |
|
| 19 | - |
|
| 20 | - /** |
|
| 21 | - * Array of localizable messages |
|
| 22 | - */ |
|
| 23 | - public $messages = array(); |
|
| 24 | - |
|
| 25 | - /** |
|
| 26 | - * Array of localizable error codes |
|
| 27 | - */ |
|
| 28 | - public $errorNames = array(); |
|
| 29 | - |
|
| 30 | - /** |
|
| 31 | - * True if no message file was found for this language, so English |
|
| 32 | - * is being used instead. Check this if you'd like to notify the |
|
| 33 | - * user that they've used a non-supported language. |
|
| 34 | - */ |
|
| 35 | - public $error = false; |
|
| 36 | - |
|
| 37 | - /** |
|
| 38 | - * Has the language object been loaded yet? |
|
| 39 | - * @todo Make it private, fix usage in HTMLPurifier_LanguageTest |
|
| 40 | - */ |
|
| 41 | - public $_loaded = false; |
|
| 42 | - |
|
| 43 | - /** |
|
| 44 | - * Instances of HTMLPurifier_Config and HTMLPurifier_Context |
|
| 45 | - */ |
|
| 46 | - protected $config, $context; |
|
| 47 | - |
|
| 48 | - public function __construct($config, $context) { |
|
| 49 | - $this->config = $config; |
|
| 50 | - $this->context = $context; |
|
| 51 | - } |
|
| 52 | - |
|
| 53 | - /** |
|
| 54 | - * Loads language object with necessary info from factory cache |
|
| 55 | - * @note This is a lazy loader |
|
| 56 | - */ |
|
| 57 | - public function load() { |
|
| 58 | - if ($this->_loaded) return; |
|
| 59 | - $factory = HTMLPurifier_LanguageFactory::instance(); |
|
| 60 | - $factory->loadLanguage($this->code); |
|
| 61 | - foreach ($factory->keys as $key) { |
|
| 62 | - $this->$key = $factory->cache[$this->code][$key]; |
|
| 63 | - } |
|
| 64 | - $this->_loaded = true; |
|
| 65 | - } |
|
| 66 | - |
|
| 67 | - /** |
|
| 68 | - * Retrieves a localised message. |
|
| 69 | - * @param $key string identifier of message |
|
| 70 | - * @return string localised message |
|
| 71 | - */ |
|
| 72 | - public function getMessage($key) { |
|
| 73 | - if (!$this->_loaded) $this->load(); |
|
| 74 | - if (!isset($this->messages[$key])) return "[$key]"; |
|
| 75 | - return $this->messages[$key]; |
|
| 76 | - } |
|
| 77 | - |
|
| 78 | - /** |
|
| 79 | - * Retrieves a localised error name. |
|
| 80 | - * @param $int integer error number, corresponding to PHP's error |
|
| 81 | - * reporting |
|
| 82 | - * @return string localised message |
|
| 83 | - */ |
|
| 84 | - public function getErrorName($int) { |
|
| 85 | - if (!$this->_loaded) $this->load(); |
|
| 86 | - if (!isset($this->errorNames[$int])) return "[Error: $int]"; |
|
| 87 | - return $this->errorNames[$int]; |
|
| 88 | - } |
|
| 89 | - |
|
| 90 | - /** |
|
| 91 | - * Converts an array list into a string readable representation |
|
| 92 | - */ |
|
| 93 | - public function listify($array) { |
|
| 94 | - $sep = $this->getMessage('Item separator'); |
|
| 95 | - $sep_last = $this->getMessage('Item separator last'); |
|
| 96 | - $ret = ''; |
|
| 97 | - for ($i = 0, $c = count($array); $i < $c; $i++) { |
|
| 98 | - if ($i == 0) { |
|
| 99 | - } elseif ($i + 1 < $c) { |
|
| 100 | - $ret .= $sep; |
|
| 101 | - } else { |
|
| 102 | - $ret .= $sep_last; |
|
| 103 | - } |
|
| 104 | - $ret .= $array[$i]; |
|
| 105 | - } |
|
| 106 | - return $ret; |
|
| 107 | - } |
|
| 108 | - |
|
| 109 | - /** |
|
| 110 | - * Formats a localised message with passed parameters |
|
| 111 | - * @param $key string identifier of message |
|
| 112 | - * @param $args Parameters to substitute in |
|
| 113 | - * @return string localised message |
|
| 114 | - * @todo Implement conditionals? Right now, some messages make |
|
| 115 | - * reference to line numbers, but those aren't always available |
|
| 116 | - */ |
|
| 117 | - public function formatMessage($key, $args = array()) { |
|
| 118 | - if (!$this->_loaded) $this->load(); |
|
| 119 | - if (!isset($this->messages[$key])) return "[$key]"; |
|
| 120 | - $raw = $this->messages[$key]; |
|
| 121 | - $subst = array(); |
|
| 122 | - $generator = false; |
|
| 123 | - foreach ($args as $i => $value) { |
|
| 124 | - if (is_object($value)) { |
|
| 125 | - if ($value instanceof HTMLPurifier_Token) { |
|
| 126 | - // factor this out some time |
|
| 127 | - if (!$generator) $generator = $this->context->get('Generator'); |
|
| 128 | - if (isset($value->name)) $subst['$'.$i.'.Name'] = $value->name; |
|
| 129 | - if (isset($value->data)) $subst['$'.$i.'.Data'] = $value->data; |
|
| 130 | - $subst['$'.$i.'.Compact'] = |
|
| 131 | - $subst['$'.$i.'.Serialized'] = $generator->generateFromToken($value); |
|
| 132 | - // a more complex algorithm for compact representation |
|
| 133 | - // could be introduced for all types of tokens. This |
|
| 134 | - // may need to be factored out into a dedicated class |
|
| 135 | - if (!empty($value->attr)) { |
|
| 136 | - $stripped_token = clone $value; |
|
| 137 | - $stripped_token->attr = array(); |
|
| 138 | - $subst['$'.$i.'.Compact'] = $generator->generateFromToken($stripped_token); |
|
| 139 | - } |
|
| 140 | - $subst['$'.$i.'.Line'] = $value->line ? $value->line : 'unknown'; |
|
| 141 | - } |
|
| 142 | - continue; |
|
| 143 | - } elseif (is_array($value)) { |
|
| 144 | - $keys = array_keys($value); |
|
| 145 | - if (array_keys($keys) === $keys) { |
|
| 146 | - // list |
|
| 147 | - $subst['$'.$i] = $this->listify($value); |
|
| 148 | - } else { |
|
| 149 | - // associative array |
|
| 150 | - // no $i implementation yet, sorry |
|
| 151 | - $subst['$'.$i.'.Keys'] = $this->listify($keys); |
|
| 152 | - $subst['$'.$i.'.Values'] = $this->listify(array_values($value)); |
|
| 153 | - } |
|
| 154 | - continue; |
|
| 155 | - } |
|
| 156 | - $subst['$' . $i] = $value; |
|
| 157 | - } |
|
| 158 | - return strtr($raw, $subst); |
|
| 159 | - } |
|
| 10 | + /** |
|
| 11 | + * ISO 639 language code of language. Prefers shortest possible version |
|
| 12 | + */ |
|
| 13 | + public $code = 'en'; |
|
| 14 | + |
|
| 15 | + /** |
|
| 16 | + * Fallback language code |
|
| 17 | + */ |
|
| 18 | + public $fallback = false; |
|
| 19 | + |
|
| 20 | + /** |
|
| 21 | + * Array of localizable messages |
|
| 22 | + */ |
|
| 23 | + public $messages = array(); |
|
| 24 | + |
|
| 25 | + /** |
|
| 26 | + * Array of localizable error codes |
|
| 27 | + */ |
|
| 28 | + public $errorNames = array(); |
|
| 29 | + |
|
| 30 | + /** |
|
| 31 | + * True if no message file was found for this language, so English |
|
| 32 | + * is being used instead. Check this if you'd like to notify the |
|
| 33 | + * user that they've used a non-supported language. |
|
| 34 | + */ |
|
| 35 | + public $error = false; |
|
| 36 | + |
|
| 37 | + /** |
|
| 38 | + * Has the language object been loaded yet? |
|
| 39 | + * @todo Make it private, fix usage in HTMLPurifier_LanguageTest |
|
| 40 | + */ |
|
| 41 | + public $_loaded = false; |
|
| 42 | + |
|
| 43 | + /** |
|
| 44 | + * Instances of HTMLPurifier_Config and HTMLPurifier_Context |
|
| 45 | + */ |
|
| 46 | + protected $config, $context; |
|
| 47 | + |
|
| 48 | + public function __construct($config, $context) { |
|
| 49 | + $this->config = $config; |
|
| 50 | + $this->context = $context; |
|
| 51 | + } |
|
| 52 | + |
|
| 53 | + /** |
|
| 54 | + * Loads language object with necessary info from factory cache |
|
| 55 | + * @note This is a lazy loader |
|
| 56 | + */ |
|
| 57 | + public function load() { |
|
| 58 | + if ($this->_loaded) return; |
|
| 59 | + $factory = HTMLPurifier_LanguageFactory::instance(); |
|
| 60 | + $factory->loadLanguage($this->code); |
|
| 61 | + foreach ($factory->keys as $key) { |
|
| 62 | + $this->$key = $factory->cache[$this->code][$key]; |
|
| 63 | + } |
|
| 64 | + $this->_loaded = true; |
|
| 65 | + } |
|
| 66 | + |
|
| 67 | + /** |
|
| 68 | + * Retrieves a localised message. |
|
| 69 | + * @param $key string identifier of message |
|
| 70 | + * @return string localised message |
|
| 71 | + */ |
|
| 72 | + public function getMessage($key) { |
|
| 73 | + if (!$this->_loaded) $this->load(); |
|
| 74 | + if (!isset($this->messages[$key])) return "[$key]"; |
|
| 75 | + return $this->messages[$key]; |
|
| 76 | + } |
|
| 77 | + |
|
| 78 | + /** |
|
| 79 | + * Retrieves a localised error name. |
|
| 80 | + * @param $int integer error number, corresponding to PHP's error |
|
| 81 | + * reporting |
|
| 82 | + * @return string localised message |
|
| 83 | + */ |
|
| 84 | + public function getErrorName($int) { |
|
| 85 | + if (!$this->_loaded) $this->load(); |
|
| 86 | + if (!isset($this->errorNames[$int])) return "[Error: $int]"; |
|
| 87 | + return $this->errorNames[$int]; |
|
| 88 | + } |
|
| 89 | + |
|
| 90 | + /** |
|
| 91 | + * Converts an array list into a string readable representation |
|
| 92 | + */ |
|
| 93 | + public function listify($array) { |
|
| 94 | + $sep = $this->getMessage('Item separator'); |
|
| 95 | + $sep_last = $this->getMessage('Item separator last'); |
|
| 96 | + $ret = ''; |
|
| 97 | + for ($i = 0, $c = count($array); $i < $c; $i++) { |
|
| 98 | + if ($i == 0) { |
|
| 99 | + } elseif ($i + 1 < $c) { |
|
| 100 | + $ret .= $sep; |
|
| 101 | + } else { |
|
| 102 | + $ret .= $sep_last; |
|
| 103 | + } |
|
| 104 | + $ret .= $array[$i]; |
|
| 105 | + } |
|
| 106 | + return $ret; |
|
| 107 | + } |
|
| 108 | + |
|
| 109 | + /** |
|
| 110 | + * Formats a localised message with passed parameters |
|
| 111 | + * @param $key string identifier of message |
|
| 112 | + * @param $args Parameters to substitute in |
|
| 113 | + * @return string localised message |
|
| 114 | + * @todo Implement conditionals? Right now, some messages make |
|
| 115 | + * reference to line numbers, but those aren't always available |
|
| 116 | + */ |
|
| 117 | + public function formatMessage($key, $args = array()) { |
|
| 118 | + if (!$this->_loaded) $this->load(); |
|
| 119 | + if (!isset($this->messages[$key])) return "[$key]"; |
|
| 120 | + $raw = $this->messages[$key]; |
|
| 121 | + $subst = array(); |
|
| 122 | + $generator = false; |
|
| 123 | + foreach ($args as $i => $value) { |
|
| 124 | + if (is_object($value)) { |
|
| 125 | + if ($value instanceof HTMLPurifier_Token) { |
|
| 126 | + // factor this out some time |
|
| 127 | + if (!$generator) $generator = $this->context->get('Generator'); |
|
| 128 | + if (isset($value->name)) $subst['$'.$i.'.Name'] = $value->name; |
|
| 129 | + if (isset($value->data)) $subst['$'.$i.'.Data'] = $value->data; |
|
| 130 | + $subst['$'.$i.'.Compact'] = |
|
| 131 | + $subst['$'.$i.'.Serialized'] = $generator->generateFromToken($value); |
|
| 132 | + // a more complex algorithm for compact representation |
|
| 133 | + // could be introduced for all types of tokens. This |
|
| 134 | + // may need to be factored out into a dedicated class |
|
| 135 | + if (!empty($value->attr)) { |
|
| 136 | + $stripped_token = clone $value; |
|
| 137 | + $stripped_token->attr = array(); |
|
| 138 | + $subst['$'.$i.'.Compact'] = $generator->generateFromToken($stripped_token); |
|
| 139 | + } |
|
| 140 | + $subst['$'.$i.'.Line'] = $value->line ? $value->line : 'unknown'; |
|
| 141 | + } |
|
| 142 | + continue; |
|
| 143 | + } elseif (is_array($value)) { |
|
| 144 | + $keys = array_keys($value); |
|
| 145 | + if (array_keys($keys) === $keys) { |
|
| 146 | + // list |
|
| 147 | + $subst['$'.$i] = $this->listify($value); |
|
| 148 | + } else { |
|
| 149 | + // associative array |
|
| 150 | + // no $i implementation yet, sorry |
|
| 151 | + $subst['$'.$i.'.Keys'] = $this->listify($keys); |
|
| 152 | + $subst['$'.$i.'.Values'] = $this->listify(array_values($value)); |
|
| 153 | + } |
|
| 154 | + continue; |
|
| 155 | + } |
|
| 156 | + $subst['$' . $i] = $value; |
|
| 157 | + } |
|
| 158 | + return strtr($raw, $subst); |
|
| 159 | + } |
|
| 160 | 160 | |
| 161 | 161 | } |
| 162 | 162 | |
@@ -55,7 +55,9 @@ discard block |
||
| 55 | 55 | * @note This is a lazy loader |
| 56 | 56 | */ |
| 57 | 57 | public function load() { |
| 58 | - if ($this->_loaded) return; |
|
| 58 | + if ($this->_loaded) { |
|
| 59 | + return; |
|
| 60 | + } |
|
| 59 | 61 | $factory = HTMLPurifier_LanguageFactory::instance(); |
| 60 | 62 | $factory->loadLanguage($this->code); |
| 61 | 63 | foreach ($factory->keys as $key) { |
@@ -70,8 +72,12 @@ discard block |
||
| 70 | 72 | * @return string localised message |
| 71 | 73 | */ |
| 72 | 74 | public function getMessage($key) { |
| 73 | - if (!$this->_loaded) $this->load(); |
|
| 74 | - if (!isset($this->messages[$key])) return "[$key]"; |
|
| 75 | + if (!$this->_loaded) { |
|
| 76 | + $this->load(); |
|
| 77 | + } |
|
| 78 | + if (!isset($this->messages[$key])) { |
|
| 79 | + return "[$key]"; |
|
| 80 | + } |
|
| 75 | 81 | return $this->messages[$key]; |
| 76 | 82 | } |
| 77 | 83 | |
@@ -82,8 +88,12 @@ discard block |
||
| 82 | 88 | * @return string localised message |
| 83 | 89 | */ |
| 84 | 90 | public function getErrorName($int) { |
| 85 | - if (!$this->_loaded) $this->load(); |
|
| 86 | - if (!isset($this->errorNames[$int])) return "[Error: $int]"; |
|
| 91 | + if (!$this->_loaded) { |
|
| 92 | + $this->load(); |
|
| 93 | + } |
|
| 94 | + if (!isset($this->errorNames[$int])) { |
|
| 95 | + return "[Error: $int]"; |
|
| 96 | + } |
|
| 87 | 97 | return $this->errorNames[$int]; |
| 88 | 98 | } |
| 89 | 99 | |
@@ -115,8 +125,12 @@ discard block |
||
| 115 | 125 | * reference to line numbers, but those aren't always available |
| 116 | 126 | */ |
| 117 | 127 | public function formatMessage($key, $args = array()) { |
| 118 | - if (!$this->_loaded) $this->load(); |
|
| 119 | - if (!isset($this->messages[$key])) return "[$key]"; |
|
| 128 | + if (!$this->_loaded) { |
|
| 129 | + $this->load(); |
|
| 130 | + } |
|
| 131 | + if (!isset($this->messages[$key])) { |
|
| 132 | + return "[$key]"; |
|
| 133 | + } |
|
| 120 | 134 | $raw = $this->messages[$key]; |
| 121 | 135 | $subst = array(); |
| 122 | 136 | $generator = false; |
@@ -124,9 +138,15 @@ discard block |
||
| 124 | 138 | if (is_object($value)) { |
| 125 | 139 | if ($value instanceof HTMLPurifier_Token) { |
| 126 | 140 | // factor this out some time |
| 127 | - if (!$generator) $generator = $this->context->get('Generator'); |
|
| 128 | - if (isset($value->name)) $subst['$'.$i.'.Name'] = $value->name; |
|
| 129 | - if (isset($value->data)) $subst['$'.$i.'.Data'] = $value->data; |
|
| 141 | + if (!$generator) { |
|
| 142 | + $generator = $this->context->get('Generator'); |
|
| 143 | + } |
|
| 144 | + if (isset($value->name)) { |
|
| 145 | + $subst['$'.$i.'.Name'] = $value->name; |
|
| 146 | + } |
|
| 147 | + if (isset($value->data)) { |
|
| 148 | + $subst['$'.$i.'.Data'] = $value->data; |
|
| 149 | + } |
|
| 130 | 150 | $subst['$'.$i.'.Compact'] = |
| 131 | 151 | $subst['$'.$i.'.Serialized'] = $generator->generateFromToken($value); |
| 132 | 152 | // a more complex algorithm for compact representation |
@@ -153,7 +153,7 @@ |
||
| 153 | 153 | } |
| 154 | 154 | continue; |
| 155 | 155 | } |
| 156 | - $subst['$' . $i] = $value; |
|
| 156 | + $subst['$'.$i] = $value; |
|
| 157 | 157 | } |
| 158 | 158 | return strtr($raw, $subst); |
| 159 | 159 | } |
@@ -319,6 +319,9 @@ discard block |
||
| 319 | 319 | |
| 320 | 320 | /** |
| 321 | 321 | * PHP 5.0.x compatible substr_count that implements offset and length |
| 322 | + * @param string $needle |
|
| 323 | + * @param integer $offset |
|
| 324 | + * @param integer $length |
|
| 322 | 325 | */ |
| 323 | 326 | protected function substrCount($haystack, $needle, $offset, $length) { |
| 324 | 327 | static $oldVersion; |
@@ -336,7 +339,7 @@ discard block |
||
| 336 | 339 | /** |
| 337 | 340 | * Takes the inside of an HTML tag and makes an assoc array of attributes. |
| 338 | 341 | * |
| 339 | - * @param $string Inside of tag excluding name. |
|
| 342 | + * @param string $string Inside of tag excluding name. |
|
| 340 | 343 | * @returns Assoc array of attributes. |
| 341 | 344 | */ |
| 342 | 345 | public function parseAttributeString($string, $config, $context) { |
@@ -13,477 +13,477 @@ |
||
| 13 | 13 | class HTMLPurifier_Lexer_DirectLex extends HTMLPurifier_Lexer |
| 14 | 14 | { |
| 15 | 15 | |
| 16 | - public $tracksLineNumbers = true; |
|
| 17 | - |
|
| 18 | - /** |
|
| 19 | - * Whitespace characters for str(c)spn. |
|
| 20 | - */ |
|
| 21 | - protected $_whitespace = "\x20\x09\x0D\x0A"; |
|
| 22 | - |
|
| 23 | - /** |
|
| 24 | - * Callback function for script CDATA fudge |
|
| 25 | - * @param $matches, in form of array(opening tag, contents, closing tag) |
|
| 26 | - */ |
|
| 27 | - protected function scriptCallback($matches) { |
|
| 28 | - return $matches[1] . htmlspecialchars($matches[2], ENT_COMPAT, 'UTF-8', false) . $matches[3]; |
|
| 29 | - } |
|
| 30 | - |
|
| 31 | - public function tokenizeHTML($html, $config, $context) { |
|
| 32 | - |
|
| 33 | - // special normalization for script tags without any armor |
|
| 34 | - // our "armor" heurstic is a < sign any number of whitespaces after |
|
| 35 | - // the first script tag |
|
| 36 | - if ($config->get('HTML.Trusted')) { |
|
| 37 | - $html = preg_replace_callback('#(<script[^>]*>)(\s*[^<].+?)(</script>)#si', |
|
| 38 | - array($this, 'scriptCallback'), $html); |
|
| 39 | - } |
|
| 40 | - |
|
| 41 | - $html = $this->normalize($html, $config, $context); |
|
| 42 | - |
|
| 43 | - $cursor = 0; // our location in the text |
|
| 44 | - $inside_tag = false; // whether or not we're parsing the inside of a tag |
|
| 45 | - $array = array(); // result array |
|
| 46 | - |
|
| 47 | - // This is also treated to mean maintain *column* numbers too |
|
| 48 | - $maintain_line_numbers = $config->get('Core.MaintainLineNumbers'); |
|
| 49 | - |
|
| 50 | - if ($maintain_line_numbers === null) { |
|
| 51 | - // automatically determine line numbering by checking |
|
| 52 | - // if error collection is on |
|
| 53 | - $maintain_line_numbers = $config->get('Core.CollectErrors'); |
|
| 54 | - } |
|
| 55 | - |
|
| 56 | - if ($maintain_line_numbers) { |
|
| 57 | - $current_line = 1; |
|
| 58 | - $current_col = 0; |
|
| 59 | - $length = strlen($html); |
|
| 60 | - } else { |
|
| 61 | - $current_line = false; |
|
| 62 | - $current_col = false; |
|
| 63 | - $length = false; |
|
| 64 | - } |
|
| 65 | - $context->register('CurrentLine', $current_line); |
|
| 66 | - $context->register('CurrentCol', $current_col); |
|
| 67 | - $nl = "\n"; |
|
| 68 | - // how often to manually recalculate. This will ALWAYS be right, |
|
| 69 | - // but it's pretty wasteful. Set to 0 to turn off |
|
| 70 | - $synchronize_interval = $config->get('Core.DirectLexLineNumberSyncInterval'); |
|
| 71 | - |
|
| 72 | - $e = false; |
|
| 73 | - if ($config->get('Core.CollectErrors')) { |
|
| 74 | - $e =& $context->get('ErrorCollector'); |
|
| 75 | - } |
|
| 76 | - |
|
| 77 | - // for testing synchronization |
|
| 78 | - $loops = 0; |
|
| 79 | - |
|
| 80 | - while(++$loops) { |
|
| 81 | - |
|
| 82 | - // $cursor is either at the start of a token, or inside of |
|
| 83 | - // a tag (i.e. there was a < immediately before it), as indicated |
|
| 84 | - // by $inside_tag |
|
| 85 | - |
|
| 86 | - if ($maintain_line_numbers) { |
|
| 87 | - |
|
| 88 | - // $rcursor, however, is always at the start of a token. |
|
| 89 | - $rcursor = $cursor - (int) $inside_tag; |
|
| 90 | - |
|
| 91 | - // Column number is cheap, so we calculate it every round. |
|
| 92 | - // We're interested at the *end* of the newline string, so |
|
| 93 | - // we need to add strlen($nl) == 1 to $nl_pos before subtracting it |
|
| 94 | - // from our "rcursor" position. |
|
| 95 | - $nl_pos = strrpos($html, $nl, $rcursor - $length); |
|
| 96 | - $current_col = $rcursor - (is_bool($nl_pos) ? 0 : $nl_pos + 1); |
|
| 97 | - |
|
| 98 | - // recalculate lines |
|
| 99 | - if ( |
|
| 100 | - $synchronize_interval && // synchronization is on |
|
| 101 | - $cursor > 0 && // cursor is further than zero |
|
| 102 | - $loops % $synchronize_interval === 0 // time to synchronize! |
|
| 103 | - ) { |
|
| 104 | - $current_line = 1 + $this->substrCount($html, $nl, 0, $cursor); |
|
| 105 | - } |
|
| 106 | - |
|
| 107 | - } |
|
| 108 | - |
|
| 109 | - $position_next_lt = strpos($html, '<', $cursor); |
|
| 110 | - $position_next_gt = strpos($html, '>', $cursor); |
|
| 111 | - |
|
| 112 | - // triggers on "<b>asdf</b>" but not "asdf <b></b>" |
|
| 113 | - // special case to set up context |
|
| 114 | - if ($position_next_lt === $cursor) { |
|
| 115 | - $inside_tag = true; |
|
| 116 | - $cursor++; |
|
| 117 | - } |
|
| 118 | - |
|
| 119 | - if (!$inside_tag && $position_next_lt !== false) { |
|
| 120 | - // We are not inside tag and there still is another tag to parse |
|
| 121 | - $token = new |
|
| 122 | - HTMLPurifier_Token_Text( |
|
| 123 | - $this->parseData( |
|
| 124 | - substr( |
|
| 125 | - $html, $cursor, $position_next_lt - $cursor |
|
| 126 | - ) |
|
| 127 | - ) |
|
| 128 | - ); |
|
| 129 | - if ($maintain_line_numbers) { |
|
| 130 | - $token->rawPosition($current_line, $current_col); |
|
| 131 | - $current_line += $this->substrCount($html, $nl, $cursor, $position_next_lt - $cursor); |
|
| 132 | - } |
|
| 133 | - $array[] = $token; |
|
| 134 | - $cursor = $position_next_lt + 1; |
|
| 135 | - $inside_tag = true; |
|
| 136 | - continue; |
|
| 137 | - } elseif (!$inside_tag) { |
|
| 138 | - // We are not inside tag but there are no more tags |
|
| 139 | - // If we're already at the end, break |
|
| 140 | - if ($cursor === strlen($html)) break; |
|
| 141 | - // Create Text of rest of string |
|
| 142 | - $token = new |
|
| 143 | - HTMLPurifier_Token_Text( |
|
| 144 | - $this->parseData( |
|
| 145 | - substr( |
|
| 146 | - $html, $cursor |
|
| 147 | - ) |
|
| 148 | - ) |
|
| 149 | - ); |
|
| 150 | - if ($maintain_line_numbers) $token->rawPosition($current_line, $current_col); |
|
| 151 | - $array[] = $token; |
|
| 152 | - break; |
|
| 153 | - } elseif ($inside_tag && $position_next_gt !== false) { |
|
| 154 | - // We are in tag and it is well formed |
|
| 155 | - // Grab the internals of the tag |
|
| 156 | - $strlen_segment = $position_next_gt - $cursor; |
|
| 157 | - |
|
| 158 | - if ($strlen_segment < 1) { |
|
| 159 | - // there's nothing to process! |
|
| 160 | - $token = new HTMLPurifier_Token_Text('<'); |
|
| 161 | - $cursor++; |
|
| 162 | - continue; |
|
| 163 | - } |
|
| 164 | - |
|
| 165 | - $segment = substr($html, $cursor, $strlen_segment); |
|
| 166 | - |
|
| 167 | - if ($segment === false) { |
|
| 168 | - // somehow, we attempted to access beyond the end of |
|
| 169 | - // the string, defense-in-depth, reported by Nate Abele |
|
| 170 | - break; |
|
| 171 | - } |
|
| 172 | - |
|
| 173 | - // Check if it's a comment |
|
| 174 | - if ( |
|
| 175 | - substr($segment, 0, 3) === '!--' |
|
| 176 | - ) { |
|
| 177 | - // re-determine segment length, looking for --> |
|
| 178 | - $position_comment_end = strpos($html, '-->', $cursor); |
|
| 179 | - if ($position_comment_end === false) { |
|
| 180 | - // uh oh, we have a comment that extends to |
|
| 181 | - // infinity. Can't be helped: set comment |
|
| 182 | - // end position to end of string |
|
| 183 | - if ($e) $e->send(E_WARNING, 'Lexer: Unclosed comment'); |
|
| 184 | - $position_comment_end = strlen($html); |
|
| 185 | - $end = true; |
|
| 186 | - } else { |
|
| 187 | - $end = false; |
|
| 188 | - } |
|
| 189 | - $strlen_segment = $position_comment_end - $cursor; |
|
| 190 | - $segment = substr($html, $cursor, $strlen_segment); |
|
| 191 | - $token = new |
|
| 192 | - HTMLPurifier_Token_Comment( |
|
| 193 | - substr( |
|
| 194 | - $segment, 3, $strlen_segment - 3 |
|
| 195 | - ) |
|
| 196 | - ); |
|
| 197 | - if ($maintain_line_numbers) { |
|
| 198 | - $token->rawPosition($current_line, $current_col); |
|
| 199 | - $current_line += $this->substrCount($html, $nl, $cursor, $strlen_segment); |
|
| 200 | - } |
|
| 201 | - $array[] = $token; |
|
| 202 | - $cursor = $end ? $position_comment_end : $position_comment_end + 3; |
|
| 203 | - $inside_tag = false; |
|
| 204 | - continue; |
|
| 205 | - } |
|
| 206 | - |
|
| 207 | - // Check if it's an end tag |
|
| 208 | - $is_end_tag = (strpos($segment,'/') === 0); |
|
| 209 | - if ($is_end_tag) { |
|
| 210 | - $type = substr($segment, 1); |
|
| 211 | - $token = new HTMLPurifier_Token_End($type); |
|
| 212 | - if ($maintain_line_numbers) { |
|
| 213 | - $token->rawPosition($current_line, $current_col); |
|
| 214 | - $current_line += $this->substrCount($html, $nl, $cursor, $position_next_gt - $cursor); |
|
| 215 | - } |
|
| 216 | - $array[] = $token; |
|
| 217 | - $inside_tag = false; |
|
| 218 | - $cursor = $position_next_gt + 1; |
|
| 219 | - continue; |
|
| 220 | - } |
|
| 221 | - |
|
| 222 | - // Check leading character is alnum, if not, we may |
|
| 223 | - // have accidently grabbed an emoticon. Translate into |
|
| 224 | - // text and go our merry way |
|
| 225 | - if (!ctype_alpha($segment[0])) { |
|
| 226 | - // XML: $segment[0] !== '_' && $segment[0] !== ':' |
|
| 227 | - if ($e) $e->send(E_NOTICE, 'Lexer: Unescaped lt'); |
|
| 228 | - $token = new HTMLPurifier_Token_Text('<'); |
|
| 229 | - if ($maintain_line_numbers) { |
|
| 230 | - $token->rawPosition($current_line, $current_col); |
|
| 231 | - $current_line += $this->substrCount($html, $nl, $cursor, $position_next_gt - $cursor); |
|
| 232 | - } |
|
| 233 | - $array[] = $token; |
|
| 234 | - $inside_tag = false; |
|
| 235 | - continue; |
|
| 236 | - } |
|
| 237 | - |
|
| 238 | - // Check if it is explicitly self closing, if so, remove |
|
| 239 | - // trailing slash. Remember, we could have a tag like <br>, so |
|
| 240 | - // any later token processing scripts must convert improperly |
|
| 241 | - // classified EmptyTags from StartTags. |
|
| 242 | - $is_self_closing = (strrpos($segment,'/') === $strlen_segment-1); |
|
| 243 | - if ($is_self_closing) { |
|
| 244 | - $strlen_segment--; |
|
| 245 | - $segment = substr($segment, 0, $strlen_segment); |
|
| 246 | - } |
|
| 247 | - |
|
| 248 | - // Check if there are any attributes |
|
| 249 | - $position_first_space = strcspn($segment, $this->_whitespace); |
|
| 250 | - |
|
| 251 | - if ($position_first_space >= $strlen_segment) { |
|
| 252 | - if ($is_self_closing) { |
|
| 253 | - $token = new HTMLPurifier_Token_Empty($segment); |
|
| 254 | - } else { |
|
| 255 | - $token = new HTMLPurifier_Token_Start($segment); |
|
| 256 | - } |
|
| 257 | - if ($maintain_line_numbers) { |
|
| 258 | - $token->rawPosition($current_line, $current_col); |
|
| 259 | - $current_line += $this->substrCount($html, $nl, $cursor, $position_next_gt - $cursor); |
|
| 260 | - } |
|
| 261 | - $array[] = $token; |
|
| 262 | - $inside_tag = false; |
|
| 263 | - $cursor = $position_next_gt + 1; |
|
| 264 | - continue; |
|
| 265 | - } |
|
| 266 | - |
|
| 267 | - // Grab out all the data |
|
| 268 | - $type = substr($segment, 0, $position_first_space); |
|
| 269 | - $attribute_string = |
|
| 270 | - trim( |
|
| 271 | - substr( |
|
| 272 | - $segment, $position_first_space |
|
| 273 | - ) |
|
| 274 | - ); |
|
| 275 | - if ($attribute_string) { |
|
| 276 | - $attr = $this->parseAttributeString( |
|
| 277 | - $attribute_string |
|
| 278 | - , $config, $context |
|
| 279 | - ); |
|
| 280 | - } else { |
|
| 281 | - $attr = array(); |
|
| 282 | - } |
|
| 283 | - |
|
| 284 | - if ($is_self_closing) { |
|
| 285 | - $token = new HTMLPurifier_Token_Empty($type, $attr); |
|
| 286 | - } else { |
|
| 287 | - $token = new HTMLPurifier_Token_Start($type, $attr); |
|
| 288 | - } |
|
| 289 | - if ($maintain_line_numbers) { |
|
| 290 | - $token->rawPosition($current_line, $current_col); |
|
| 291 | - $current_line += $this->substrCount($html, $nl, $cursor, $position_next_gt - $cursor); |
|
| 292 | - } |
|
| 293 | - $array[] = $token; |
|
| 294 | - $cursor = $position_next_gt + 1; |
|
| 295 | - $inside_tag = false; |
|
| 296 | - continue; |
|
| 297 | - } else { |
|
| 298 | - // inside tag, but there's no ending > sign |
|
| 299 | - if ($e) $e->send(E_WARNING, 'Lexer: Missing gt'); |
|
| 300 | - $token = new |
|
| 301 | - HTMLPurifier_Token_Text( |
|
| 302 | - '<' . |
|
| 303 | - $this->parseData( |
|
| 304 | - substr($html, $cursor) |
|
| 305 | - ) |
|
| 306 | - ); |
|
| 307 | - if ($maintain_line_numbers) $token->rawPosition($current_line, $current_col); |
|
| 308 | - // no cursor scroll? Hmm... |
|
| 309 | - $array[] = $token; |
|
| 310 | - break; |
|
| 311 | - } |
|
| 312 | - break; |
|
| 313 | - } |
|
| 314 | - |
|
| 315 | - $context->destroy('CurrentLine'); |
|
| 316 | - $context->destroy('CurrentCol'); |
|
| 317 | - return $array; |
|
| 318 | - } |
|
| 319 | - |
|
| 320 | - /** |
|
| 321 | - * PHP 5.0.x compatible substr_count that implements offset and length |
|
| 322 | - */ |
|
| 323 | - protected function substrCount($haystack, $needle, $offset, $length) { |
|
| 324 | - static $oldVersion; |
|
| 325 | - if ($oldVersion === null) { |
|
| 326 | - $oldVersion = version_compare(PHP_VERSION, '5.1', '<'); |
|
| 327 | - } |
|
| 328 | - if ($oldVersion) { |
|
| 329 | - $haystack = substr($haystack, $offset, $length); |
|
| 330 | - return substr_count($haystack, $needle); |
|
| 331 | - } else { |
|
| 332 | - return substr_count($haystack, $needle, $offset, $length); |
|
| 333 | - } |
|
| 334 | - } |
|
| 335 | - |
|
| 336 | - /** |
|
| 337 | - * Takes the inside of an HTML tag and makes an assoc array of attributes. |
|
| 338 | - * |
|
| 339 | - * @param $string Inside of tag excluding name. |
|
| 340 | - * @returns Assoc array of attributes. |
|
| 341 | - */ |
|
| 342 | - public function parseAttributeString($string, $config, $context) { |
|
| 343 | - $string = (string) $string; // quick typecast |
|
| 344 | - |
|
| 345 | - if ($string == '') return array(); // no attributes |
|
| 346 | - |
|
| 347 | - $e = false; |
|
| 348 | - if ($config->get('Core.CollectErrors')) { |
|
| 349 | - $e =& $context->get('ErrorCollector'); |
|
| 350 | - } |
|
| 351 | - |
|
| 352 | - // let's see if we can abort as quickly as possible |
|
| 353 | - // one equal sign, no spaces => one attribute |
|
| 354 | - $num_equal = substr_count($string, '='); |
|
| 355 | - $has_space = strpos($string, ' '); |
|
| 356 | - if ($num_equal === 0 && !$has_space) { |
|
| 357 | - // bool attribute |
|
| 358 | - return array($string => $string); |
|
| 359 | - } elseif ($num_equal === 1 && !$has_space) { |
|
| 360 | - // only one attribute |
|
| 361 | - list($key, $quoted_value) = explode('=', $string); |
|
| 362 | - $quoted_value = trim($quoted_value); |
|
| 363 | - if (!$key) { |
|
| 364 | - if ($e) $e->send(E_ERROR, 'Lexer: Missing attribute key'); |
|
| 365 | - return array(); |
|
| 366 | - } |
|
| 367 | - if (!$quoted_value) return array($key => ''); |
|
| 368 | - $first_char = @$quoted_value[0]; |
|
| 369 | - $last_char = @$quoted_value[strlen($quoted_value)-1]; |
|
| 370 | - |
|
| 371 | - $same_quote = ($first_char == $last_char); |
|
| 372 | - $open_quote = ($first_char == '"' || $first_char == "'"); |
|
| 373 | - |
|
| 374 | - if ( $same_quote && $open_quote) { |
|
| 375 | - // well behaved |
|
| 376 | - $value = substr($quoted_value, 1, strlen($quoted_value) - 2); |
|
| 377 | - } else { |
|
| 378 | - // not well behaved |
|
| 379 | - if ($open_quote) { |
|
| 380 | - if ($e) $e->send(E_ERROR, 'Lexer: Missing end quote'); |
|
| 381 | - $value = substr($quoted_value, 1); |
|
| 382 | - } else { |
|
| 383 | - $value = $quoted_value; |
|
| 384 | - } |
|
| 385 | - } |
|
| 386 | - if ($value === false) $value = ''; |
|
| 387 | - return array($key => $this->parseData($value)); |
|
| 388 | - } |
|
| 389 | - |
|
| 390 | - // setup loop environment |
|
| 391 | - $array = array(); // return assoc array of attributes |
|
| 392 | - $cursor = 0; // current position in string (moves forward) |
|
| 393 | - $size = strlen($string); // size of the string (stays the same) |
|
| 394 | - |
|
| 395 | - // if we have unquoted attributes, the parser expects a terminating |
|
| 396 | - // space, so let's guarantee that there's always a terminating space. |
|
| 397 | - $string .= ' '; |
|
| 398 | - |
|
| 399 | - while(true) { |
|
| 400 | - |
|
| 401 | - if ($cursor >= $size) { |
|
| 402 | - break; |
|
| 403 | - } |
|
| 404 | - |
|
| 405 | - $cursor += ($value = strspn($string, $this->_whitespace, $cursor)); |
|
| 406 | - // grab the key |
|
| 407 | - |
|
| 408 | - $key_begin = $cursor; //we're currently at the start of the key |
|
| 409 | - |
|
| 410 | - // scroll past all characters that are the key (not whitespace or =) |
|
| 411 | - $cursor += strcspn($string, $this->_whitespace . '=', $cursor); |
|
| 412 | - |
|
| 413 | - $key_end = $cursor; // now at the end of the key |
|
| 414 | - |
|
| 415 | - $key = substr($string, $key_begin, $key_end - $key_begin); |
|
| 416 | - |
|
| 417 | - if (!$key) { |
|
| 418 | - if ($e) $e->send(E_ERROR, 'Lexer: Missing attribute key'); |
|
| 419 | - $cursor += strcspn($string, $this->_whitespace, $cursor + 1); // prevent infinite loop |
|
| 420 | - continue; // empty key |
|
| 421 | - } |
|
| 422 | - |
|
| 423 | - // scroll past all whitespace |
|
| 424 | - $cursor += strspn($string, $this->_whitespace, $cursor); |
|
| 425 | - |
|
| 426 | - if ($cursor >= $size) { |
|
| 427 | - $array[$key] = $key; |
|
| 428 | - break; |
|
| 429 | - } |
|
| 430 | - |
|
| 431 | - // if the next character is an equal sign, we've got a regular |
|
| 432 | - // pair, otherwise, it's a bool attribute |
|
| 433 | - $first_char = @$string[$cursor]; |
|
| 434 | - |
|
| 435 | - if ($first_char == '=') { |
|
| 436 | - // key="value" |
|
| 437 | - |
|
| 438 | - $cursor++; |
|
| 439 | - $cursor += strspn($string, $this->_whitespace, $cursor); |
|
| 440 | - |
|
| 441 | - if ($cursor === false) { |
|
| 442 | - $array[$key] = ''; |
|
| 443 | - break; |
|
| 444 | - } |
|
| 445 | - |
|
| 446 | - // we might be in front of a quote right now |
|
| 447 | - |
|
| 448 | - $char = @$string[$cursor]; |
|
| 449 | - |
|
| 450 | - if ($char == '"' || $char == "'") { |
|
| 451 | - // it's quoted, end bound is $char |
|
| 452 | - $cursor++; |
|
| 453 | - $value_begin = $cursor; |
|
| 454 | - $cursor = strpos($string, $char, $cursor); |
|
| 455 | - $value_end = $cursor; |
|
| 456 | - } else { |
|
| 457 | - // it's not quoted, end bound is whitespace |
|
| 458 | - $value_begin = $cursor; |
|
| 459 | - $cursor += strcspn($string, $this->_whitespace, $cursor); |
|
| 460 | - $value_end = $cursor; |
|
| 461 | - } |
|
| 462 | - |
|
| 463 | - // we reached a premature end |
|
| 464 | - if ($cursor === false) { |
|
| 465 | - $cursor = $size; |
|
| 466 | - $value_end = $cursor; |
|
| 467 | - } |
|
| 468 | - |
|
| 469 | - $value = substr($string, $value_begin, $value_end - $value_begin); |
|
| 470 | - if ($value === false) $value = ''; |
|
| 471 | - $array[$key] = $this->parseData($value); |
|
| 472 | - $cursor++; |
|
| 473 | - |
|
| 474 | - } else { |
|
| 475 | - // boolattr |
|
| 476 | - if ($key !== '') { |
|
| 477 | - $array[$key] = $key; |
|
| 478 | - } else { |
|
| 479 | - // purely theoretical |
|
| 480 | - if ($e) $e->send(E_ERROR, 'Lexer: Missing attribute key'); |
|
| 481 | - } |
|
| 482 | - |
|
| 483 | - } |
|
| 484 | - } |
|
| 485 | - return $array; |
|
| 486 | - } |
|
| 16 | + public $tracksLineNumbers = true; |
|
| 17 | + |
|
| 18 | + /** |
|
| 19 | + * Whitespace characters for str(c)spn. |
|
| 20 | + */ |
|
| 21 | + protected $_whitespace = "\x20\x09\x0D\x0A"; |
|
| 22 | + |
|
| 23 | + /** |
|
| 24 | + * Callback function for script CDATA fudge |
|
| 25 | + * @param $matches, in form of array(opening tag, contents, closing tag) |
|
| 26 | + */ |
|
| 27 | + protected function scriptCallback($matches) { |
|
| 28 | + return $matches[1] . htmlspecialchars($matches[2], ENT_COMPAT, 'UTF-8', false) . $matches[3]; |
|
| 29 | + } |
|
| 30 | + |
|
| 31 | + public function tokenizeHTML($html, $config, $context) { |
|
| 32 | + |
|
| 33 | + // special normalization for script tags without any armor |
|
| 34 | + // our "armor" heurstic is a < sign any number of whitespaces after |
|
| 35 | + // the first script tag |
|
| 36 | + if ($config->get('HTML.Trusted')) { |
|
| 37 | + $html = preg_replace_callback('#(<script[^>]*>)(\s*[^<].+?)(</script>)#si', |
|
| 38 | + array($this, 'scriptCallback'), $html); |
|
| 39 | + } |
|
| 40 | + |
|
| 41 | + $html = $this->normalize($html, $config, $context); |
|
| 42 | + |
|
| 43 | + $cursor = 0; // our location in the text |
|
| 44 | + $inside_tag = false; // whether or not we're parsing the inside of a tag |
|
| 45 | + $array = array(); // result array |
|
| 46 | + |
|
| 47 | + // This is also treated to mean maintain *column* numbers too |
|
| 48 | + $maintain_line_numbers = $config->get('Core.MaintainLineNumbers'); |
|
| 49 | + |
|
| 50 | + if ($maintain_line_numbers === null) { |
|
| 51 | + // automatically determine line numbering by checking |
|
| 52 | + // if error collection is on |
|
| 53 | + $maintain_line_numbers = $config->get('Core.CollectErrors'); |
|
| 54 | + } |
|
| 55 | + |
|
| 56 | + if ($maintain_line_numbers) { |
|
| 57 | + $current_line = 1; |
|
| 58 | + $current_col = 0; |
|
| 59 | + $length = strlen($html); |
|
| 60 | + } else { |
|
| 61 | + $current_line = false; |
|
| 62 | + $current_col = false; |
|
| 63 | + $length = false; |
|
| 64 | + } |
|
| 65 | + $context->register('CurrentLine', $current_line); |
|
| 66 | + $context->register('CurrentCol', $current_col); |
|
| 67 | + $nl = "\n"; |
|
| 68 | + // how often to manually recalculate. This will ALWAYS be right, |
|
| 69 | + // but it's pretty wasteful. Set to 0 to turn off |
|
| 70 | + $synchronize_interval = $config->get('Core.DirectLexLineNumberSyncInterval'); |
|
| 71 | + |
|
| 72 | + $e = false; |
|
| 73 | + if ($config->get('Core.CollectErrors')) { |
|
| 74 | + $e =& $context->get('ErrorCollector'); |
|
| 75 | + } |
|
| 76 | + |
|
| 77 | + // for testing synchronization |
|
| 78 | + $loops = 0; |
|
| 79 | + |
|
| 80 | + while(++$loops) { |
|
| 81 | + |
|
| 82 | + // $cursor is either at the start of a token, or inside of |
|
| 83 | + // a tag (i.e. there was a < immediately before it), as indicated |
|
| 84 | + // by $inside_tag |
|
| 85 | + |
|
| 86 | + if ($maintain_line_numbers) { |
|
| 87 | + |
|
| 88 | + // $rcursor, however, is always at the start of a token. |
|
| 89 | + $rcursor = $cursor - (int) $inside_tag; |
|
| 90 | + |
|
| 91 | + // Column number is cheap, so we calculate it every round. |
|
| 92 | + // We're interested at the *end* of the newline string, so |
|
| 93 | + // we need to add strlen($nl) == 1 to $nl_pos before subtracting it |
|
| 94 | + // from our "rcursor" position. |
|
| 95 | + $nl_pos = strrpos($html, $nl, $rcursor - $length); |
|
| 96 | + $current_col = $rcursor - (is_bool($nl_pos) ? 0 : $nl_pos + 1); |
|
| 97 | + |
|
| 98 | + // recalculate lines |
|
| 99 | + if ( |
|
| 100 | + $synchronize_interval && // synchronization is on |
|
| 101 | + $cursor > 0 && // cursor is further than zero |
|
| 102 | + $loops % $synchronize_interval === 0 // time to synchronize! |
|
| 103 | + ) { |
|
| 104 | + $current_line = 1 + $this->substrCount($html, $nl, 0, $cursor); |
|
| 105 | + } |
|
| 106 | + |
|
| 107 | + } |
|
| 108 | + |
|
| 109 | + $position_next_lt = strpos($html, '<', $cursor); |
|
| 110 | + $position_next_gt = strpos($html, '>', $cursor); |
|
| 111 | + |
|
| 112 | + // triggers on "<b>asdf</b>" but not "asdf <b></b>" |
|
| 113 | + // special case to set up context |
|
| 114 | + if ($position_next_lt === $cursor) { |
|
| 115 | + $inside_tag = true; |
|
| 116 | + $cursor++; |
|
| 117 | + } |
|
| 118 | + |
|
| 119 | + if (!$inside_tag && $position_next_lt !== false) { |
|
| 120 | + // We are not inside tag and there still is another tag to parse |
|
| 121 | + $token = new |
|
| 122 | + HTMLPurifier_Token_Text( |
|
| 123 | + $this->parseData( |
|
| 124 | + substr( |
|
| 125 | + $html, $cursor, $position_next_lt - $cursor |
|
| 126 | + ) |
|
| 127 | + ) |
|
| 128 | + ); |
|
| 129 | + if ($maintain_line_numbers) { |
|
| 130 | + $token->rawPosition($current_line, $current_col); |
|
| 131 | + $current_line += $this->substrCount($html, $nl, $cursor, $position_next_lt - $cursor); |
|
| 132 | + } |
|
| 133 | + $array[] = $token; |
|
| 134 | + $cursor = $position_next_lt + 1; |
|
| 135 | + $inside_tag = true; |
|
| 136 | + continue; |
|
| 137 | + } elseif (!$inside_tag) { |
|
| 138 | + // We are not inside tag but there are no more tags |
|
| 139 | + // If we're already at the end, break |
|
| 140 | + if ($cursor === strlen($html)) break; |
|
| 141 | + // Create Text of rest of string |
|
| 142 | + $token = new |
|
| 143 | + HTMLPurifier_Token_Text( |
|
| 144 | + $this->parseData( |
|
| 145 | + substr( |
|
| 146 | + $html, $cursor |
|
| 147 | + ) |
|
| 148 | + ) |
|
| 149 | + ); |
|
| 150 | + if ($maintain_line_numbers) $token->rawPosition($current_line, $current_col); |
|
| 151 | + $array[] = $token; |
|
| 152 | + break; |
|
| 153 | + } elseif ($inside_tag && $position_next_gt !== false) { |
|
| 154 | + // We are in tag and it is well formed |
|
| 155 | + // Grab the internals of the tag |
|
| 156 | + $strlen_segment = $position_next_gt - $cursor; |
|
| 157 | + |
|
| 158 | + if ($strlen_segment < 1) { |
|
| 159 | + // there's nothing to process! |
|
| 160 | + $token = new HTMLPurifier_Token_Text('<'); |
|
| 161 | + $cursor++; |
|
| 162 | + continue; |
|
| 163 | + } |
|
| 164 | + |
|
| 165 | + $segment = substr($html, $cursor, $strlen_segment); |
|
| 166 | + |
|
| 167 | + if ($segment === false) { |
|
| 168 | + // somehow, we attempted to access beyond the end of |
|
| 169 | + // the string, defense-in-depth, reported by Nate Abele |
|
| 170 | + break; |
|
| 171 | + } |
|
| 172 | + |
|
| 173 | + // Check if it's a comment |
|
| 174 | + if ( |
|
| 175 | + substr($segment, 0, 3) === '!--' |
|
| 176 | + ) { |
|
| 177 | + // re-determine segment length, looking for --> |
|
| 178 | + $position_comment_end = strpos($html, '-->', $cursor); |
|
| 179 | + if ($position_comment_end === false) { |
|
| 180 | + // uh oh, we have a comment that extends to |
|
| 181 | + // infinity. Can't be helped: set comment |
|
| 182 | + // end position to end of string |
|
| 183 | + if ($e) $e->send(E_WARNING, 'Lexer: Unclosed comment'); |
|
| 184 | + $position_comment_end = strlen($html); |
|
| 185 | + $end = true; |
|
| 186 | + } else { |
|
| 187 | + $end = false; |
|
| 188 | + } |
|
| 189 | + $strlen_segment = $position_comment_end - $cursor; |
|
| 190 | + $segment = substr($html, $cursor, $strlen_segment); |
|
| 191 | + $token = new |
|
| 192 | + HTMLPurifier_Token_Comment( |
|
| 193 | + substr( |
|
| 194 | + $segment, 3, $strlen_segment - 3 |
|
| 195 | + ) |
|
| 196 | + ); |
|
| 197 | + if ($maintain_line_numbers) { |
|
| 198 | + $token->rawPosition($current_line, $current_col); |
|
| 199 | + $current_line += $this->substrCount($html, $nl, $cursor, $strlen_segment); |
|
| 200 | + } |
|
| 201 | + $array[] = $token; |
|
| 202 | + $cursor = $end ? $position_comment_end : $position_comment_end + 3; |
|
| 203 | + $inside_tag = false; |
|
| 204 | + continue; |
|
| 205 | + } |
|
| 206 | + |
|
| 207 | + // Check if it's an end tag |
|
| 208 | + $is_end_tag = (strpos($segment,'/') === 0); |
|
| 209 | + if ($is_end_tag) { |
|
| 210 | + $type = substr($segment, 1); |
|
| 211 | + $token = new HTMLPurifier_Token_End($type); |
|
| 212 | + if ($maintain_line_numbers) { |
|
| 213 | + $token->rawPosition($current_line, $current_col); |
|
| 214 | + $current_line += $this->substrCount($html, $nl, $cursor, $position_next_gt - $cursor); |
|
| 215 | + } |
|
| 216 | + $array[] = $token; |
|
| 217 | + $inside_tag = false; |
|
| 218 | + $cursor = $position_next_gt + 1; |
|
| 219 | + continue; |
|
| 220 | + } |
|
| 221 | + |
|
| 222 | + // Check leading character is alnum, if not, we may |
|
| 223 | + // have accidently grabbed an emoticon. Translate into |
|
| 224 | + // text and go our merry way |
|
| 225 | + if (!ctype_alpha($segment[0])) { |
|
| 226 | + // XML: $segment[0] !== '_' && $segment[0] !== ':' |
|
| 227 | + if ($e) $e->send(E_NOTICE, 'Lexer: Unescaped lt'); |
|
| 228 | + $token = new HTMLPurifier_Token_Text('<'); |
|
| 229 | + if ($maintain_line_numbers) { |
|
| 230 | + $token->rawPosition($current_line, $current_col); |
|
| 231 | + $current_line += $this->substrCount($html, $nl, $cursor, $position_next_gt - $cursor); |
|
| 232 | + } |
|
| 233 | + $array[] = $token; |
|
| 234 | + $inside_tag = false; |
|
| 235 | + continue; |
|
| 236 | + } |
|
| 237 | + |
|
| 238 | + // Check if it is explicitly self closing, if so, remove |
|
| 239 | + // trailing slash. Remember, we could have a tag like <br>, so |
|
| 240 | + // any later token processing scripts must convert improperly |
|
| 241 | + // classified EmptyTags from StartTags. |
|
| 242 | + $is_self_closing = (strrpos($segment,'/') === $strlen_segment-1); |
|
| 243 | + if ($is_self_closing) { |
|
| 244 | + $strlen_segment--; |
|
| 245 | + $segment = substr($segment, 0, $strlen_segment); |
|
| 246 | + } |
|
| 247 | + |
|
| 248 | + // Check if there are any attributes |
|
| 249 | + $position_first_space = strcspn($segment, $this->_whitespace); |
|
| 250 | + |
|
| 251 | + if ($position_first_space >= $strlen_segment) { |
|
| 252 | + if ($is_self_closing) { |
|
| 253 | + $token = new HTMLPurifier_Token_Empty($segment); |
|
| 254 | + } else { |
|
| 255 | + $token = new HTMLPurifier_Token_Start($segment); |
|
| 256 | + } |
|
| 257 | + if ($maintain_line_numbers) { |
|
| 258 | + $token->rawPosition($current_line, $current_col); |
|
| 259 | + $current_line += $this->substrCount($html, $nl, $cursor, $position_next_gt - $cursor); |
|
| 260 | + } |
|
| 261 | + $array[] = $token; |
|
| 262 | + $inside_tag = false; |
|
| 263 | + $cursor = $position_next_gt + 1; |
|
| 264 | + continue; |
|
| 265 | + } |
|
| 266 | + |
|
| 267 | + // Grab out all the data |
|
| 268 | + $type = substr($segment, 0, $position_first_space); |
|
| 269 | + $attribute_string = |
|
| 270 | + trim( |
|
| 271 | + substr( |
|
| 272 | + $segment, $position_first_space |
|
| 273 | + ) |
|
| 274 | + ); |
|
| 275 | + if ($attribute_string) { |
|
| 276 | + $attr = $this->parseAttributeString( |
|
| 277 | + $attribute_string |
|
| 278 | + , $config, $context |
|
| 279 | + ); |
|
| 280 | + } else { |
|
| 281 | + $attr = array(); |
|
| 282 | + } |
|
| 283 | + |
|
| 284 | + if ($is_self_closing) { |
|
| 285 | + $token = new HTMLPurifier_Token_Empty($type, $attr); |
|
| 286 | + } else { |
|
| 287 | + $token = new HTMLPurifier_Token_Start($type, $attr); |
|
| 288 | + } |
|
| 289 | + if ($maintain_line_numbers) { |
|
| 290 | + $token->rawPosition($current_line, $current_col); |
|
| 291 | + $current_line += $this->substrCount($html, $nl, $cursor, $position_next_gt - $cursor); |
|
| 292 | + } |
|
| 293 | + $array[] = $token; |
|
| 294 | + $cursor = $position_next_gt + 1; |
|
| 295 | + $inside_tag = false; |
|
| 296 | + continue; |
|
| 297 | + } else { |
|
| 298 | + // inside tag, but there's no ending > sign |
|
| 299 | + if ($e) $e->send(E_WARNING, 'Lexer: Missing gt'); |
|
| 300 | + $token = new |
|
| 301 | + HTMLPurifier_Token_Text( |
|
| 302 | + '<' . |
|
| 303 | + $this->parseData( |
|
| 304 | + substr($html, $cursor) |
|
| 305 | + ) |
|
| 306 | + ); |
|
| 307 | + if ($maintain_line_numbers) $token->rawPosition($current_line, $current_col); |
|
| 308 | + // no cursor scroll? Hmm... |
|
| 309 | + $array[] = $token; |
|
| 310 | + break; |
|
| 311 | + } |
|
| 312 | + break; |
|
| 313 | + } |
|
| 314 | + |
|
| 315 | + $context->destroy('CurrentLine'); |
|
| 316 | + $context->destroy('CurrentCol'); |
|
| 317 | + return $array; |
|
| 318 | + } |
|
| 319 | + |
|
| 320 | + /** |
|
| 321 | + * PHP 5.0.x compatible substr_count that implements offset and length |
|
| 322 | + */ |
|
| 323 | + protected function substrCount($haystack, $needle, $offset, $length) { |
|
| 324 | + static $oldVersion; |
|
| 325 | + if ($oldVersion === null) { |
|
| 326 | + $oldVersion = version_compare(PHP_VERSION, '5.1', '<'); |
|
| 327 | + } |
|
| 328 | + if ($oldVersion) { |
|
| 329 | + $haystack = substr($haystack, $offset, $length); |
|
| 330 | + return substr_count($haystack, $needle); |
|
| 331 | + } else { |
|
| 332 | + return substr_count($haystack, $needle, $offset, $length); |
|
| 333 | + } |
|
| 334 | + } |
|
| 335 | + |
|
| 336 | + /** |
|
| 337 | + * Takes the inside of an HTML tag and makes an assoc array of attributes. |
|
| 338 | + * |
|
| 339 | + * @param $string Inside of tag excluding name. |
|
| 340 | + * @returns Assoc array of attributes. |
|
| 341 | + */ |
|
| 342 | + public function parseAttributeString($string, $config, $context) { |
|
| 343 | + $string = (string) $string; // quick typecast |
|
| 344 | + |
|
| 345 | + if ($string == '') return array(); // no attributes |
|
| 346 | + |
|
| 347 | + $e = false; |
|
| 348 | + if ($config->get('Core.CollectErrors')) { |
|
| 349 | + $e =& $context->get('ErrorCollector'); |
|
| 350 | + } |
|
| 351 | + |
|
| 352 | + // let's see if we can abort as quickly as possible |
|
| 353 | + // one equal sign, no spaces => one attribute |
|
| 354 | + $num_equal = substr_count($string, '='); |
|
| 355 | + $has_space = strpos($string, ' '); |
|
| 356 | + if ($num_equal === 0 && !$has_space) { |
|
| 357 | + // bool attribute |
|
| 358 | + return array($string => $string); |
|
| 359 | + } elseif ($num_equal === 1 && !$has_space) { |
|
| 360 | + // only one attribute |
|
| 361 | + list($key, $quoted_value) = explode('=', $string); |
|
| 362 | + $quoted_value = trim($quoted_value); |
|
| 363 | + if (!$key) { |
|
| 364 | + if ($e) $e->send(E_ERROR, 'Lexer: Missing attribute key'); |
|
| 365 | + return array(); |
|
| 366 | + } |
|
| 367 | + if (!$quoted_value) return array($key => ''); |
|
| 368 | + $first_char = @$quoted_value[0]; |
|
| 369 | + $last_char = @$quoted_value[strlen($quoted_value)-1]; |
|
| 370 | + |
|
| 371 | + $same_quote = ($first_char == $last_char); |
|
| 372 | + $open_quote = ($first_char == '"' || $first_char == "'"); |
|
| 373 | + |
|
| 374 | + if ( $same_quote && $open_quote) { |
|
| 375 | + // well behaved |
|
| 376 | + $value = substr($quoted_value, 1, strlen($quoted_value) - 2); |
|
| 377 | + } else { |
|
| 378 | + // not well behaved |
|
| 379 | + if ($open_quote) { |
|
| 380 | + if ($e) $e->send(E_ERROR, 'Lexer: Missing end quote'); |
|
| 381 | + $value = substr($quoted_value, 1); |
|
| 382 | + } else { |
|
| 383 | + $value = $quoted_value; |
|
| 384 | + } |
|
| 385 | + } |
|
| 386 | + if ($value === false) $value = ''; |
|
| 387 | + return array($key => $this->parseData($value)); |
|
| 388 | + } |
|
| 389 | + |
|
| 390 | + // setup loop environment |
|
| 391 | + $array = array(); // return assoc array of attributes |
|
| 392 | + $cursor = 0; // current position in string (moves forward) |
|
| 393 | + $size = strlen($string); // size of the string (stays the same) |
|
| 394 | + |
|
| 395 | + // if we have unquoted attributes, the parser expects a terminating |
|
| 396 | + // space, so let's guarantee that there's always a terminating space. |
|
| 397 | + $string .= ' '; |
|
| 398 | + |
|
| 399 | + while(true) { |
|
| 400 | + |
|
| 401 | + if ($cursor >= $size) { |
|
| 402 | + break; |
|
| 403 | + } |
|
| 404 | + |
|
| 405 | + $cursor += ($value = strspn($string, $this->_whitespace, $cursor)); |
|
| 406 | + // grab the key |
|
| 407 | + |
|
| 408 | + $key_begin = $cursor; //we're currently at the start of the key |
|
| 409 | + |
|
| 410 | + // scroll past all characters that are the key (not whitespace or =) |
|
| 411 | + $cursor += strcspn($string, $this->_whitespace . '=', $cursor); |
|
| 412 | + |
|
| 413 | + $key_end = $cursor; // now at the end of the key |
|
| 414 | + |
|
| 415 | + $key = substr($string, $key_begin, $key_end - $key_begin); |
|
| 416 | + |
|
| 417 | + if (!$key) { |
|
| 418 | + if ($e) $e->send(E_ERROR, 'Lexer: Missing attribute key'); |
|
| 419 | + $cursor += strcspn($string, $this->_whitespace, $cursor + 1); // prevent infinite loop |
|
| 420 | + continue; // empty key |
|
| 421 | + } |
|
| 422 | + |
|
| 423 | + // scroll past all whitespace |
|
| 424 | + $cursor += strspn($string, $this->_whitespace, $cursor); |
|
| 425 | + |
|
| 426 | + if ($cursor >= $size) { |
|
| 427 | + $array[$key] = $key; |
|
| 428 | + break; |
|
| 429 | + } |
|
| 430 | + |
|
| 431 | + // if the next character is an equal sign, we've got a regular |
|
| 432 | + // pair, otherwise, it's a bool attribute |
|
| 433 | + $first_char = @$string[$cursor]; |
|
| 434 | + |
|
| 435 | + if ($first_char == '=') { |
|
| 436 | + // key="value" |
|
| 437 | + |
|
| 438 | + $cursor++; |
|
| 439 | + $cursor += strspn($string, $this->_whitespace, $cursor); |
|
| 440 | + |
|
| 441 | + if ($cursor === false) { |
|
| 442 | + $array[$key] = ''; |
|
| 443 | + break; |
|
| 444 | + } |
|
| 445 | + |
|
| 446 | + // we might be in front of a quote right now |
|
| 447 | + |
|
| 448 | + $char = @$string[$cursor]; |
|
| 449 | + |
|
| 450 | + if ($char == '"' || $char == "'") { |
|
| 451 | + // it's quoted, end bound is $char |
|
| 452 | + $cursor++; |
|
| 453 | + $value_begin = $cursor; |
|
| 454 | + $cursor = strpos($string, $char, $cursor); |
|
| 455 | + $value_end = $cursor; |
|
| 456 | + } else { |
|
| 457 | + // it's not quoted, end bound is whitespace |
|
| 458 | + $value_begin = $cursor; |
|
| 459 | + $cursor += strcspn($string, $this->_whitespace, $cursor); |
|
| 460 | + $value_end = $cursor; |
|
| 461 | + } |
|
| 462 | + |
|
| 463 | + // we reached a premature end |
|
| 464 | + if ($cursor === false) { |
|
| 465 | + $cursor = $size; |
|
| 466 | + $value_end = $cursor; |
|
| 467 | + } |
|
| 468 | + |
|
| 469 | + $value = substr($string, $value_begin, $value_end - $value_begin); |
|
| 470 | + if ($value === false) $value = ''; |
|
| 471 | + $array[$key] = $this->parseData($value); |
|
| 472 | + $cursor++; |
|
| 473 | + |
|
| 474 | + } else { |
|
| 475 | + // boolattr |
|
| 476 | + if ($key !== '') { |
|
| 477 | + $array[$key] = $key; |
|
| 478 | + } else { |
|
| 479 | + // purely theoretical |
|
| 480 | + if ($e) $e->send(E_ERROR, 'Lexer: Missing attribute key'); |
|
| 481 | + } |
|
| 482 | + |
|
| 483 | + } |
|
| 484 | + } |
|
| 485 | + return $array; |
|
| 486 | + } |
|
| 487 | 487 | |
| 488 | 488 | } |
| 489 | 489 | |
@@ -137,7 +137,9 @@ discard block |
||
| 137 | 137 | } elseif (!$inside_tag) { |
| 138 | 138 | // We are not inside tag but there are no more tags |
| 139 | 139 | // If we're already at the end, break |
| 140 | - if ($cursor === strlen($html)) break; |
|
| 140 | + if ($cursor === strlen($html)) { |
|
| 141 | + break; |
|
| 142 | + } |
|
| 141 | 143 | // Create Text of rest of string |
| 142 | 144 | $token = new |
| 143 | 145 | HTMLPurifier_Token_Text( |
@@ -147,7 +149,9 @@ discard block |
||
| 147 | 149 | ) |
| 148 | 150 | ) |
| 149 | 151 | ); |
| 150 | - if ($maintain_line_numbers) $token->rawPosition($current_line, $current_col); |
|
| 152 | + if ($maintain_line_numbers) { |
|
| 153 | + $token->rawPosition($current_line, $current_col); |
|
| 154 | + } |
|
| 151 | 155 | $array[] = $token; |
| 152 | 156 | break; |
| 153 | 157 | } elseif ($inside_tag && $position_next_gt !== false) { |
@@ -180,7 +184,9 @@ discard block |
||
| 180 | 184 | // uh oh, we have a comment that extends to |
| 181 | 185 | // infinity. Can't be helped: set comment |
| 182 | 186 | // end position to end of string |
| 183 | - if ($e) $e->send(E_WARNING, 'Lexer: Unclosed comment'); |
|
| 187 | + if ($e) { |
|
| 188 | + $e->send(E_WARNING, 'Lexer: Unclosed comment'); |
|
| 189 | + } |
|
| 184 | 190 | $position_comment_end = strlen($html); |
| 185 | 191 | $end = true; |
| 186 | 192 | } else { |
@@ -224,7 +230,9 @@ discard block |
||
| 224 | 230 | // text and go our merry way |
| 225 | 231 | if (!ctype_alpha($segment[0])) { |
| 226 | 232 | // XML: $segment[0] !== '_' && $segment[0] !== ':' |
| 227 | - if ($e) $e->send(E_NOTICE, 'Lexer: Unescaped lt'); |
|
| 233 | + if ($e) { |
|
| 234 | + $e->send(E_NOTICE, 'Lexer: Unescaped lt'); |
|
| 235 | + } |
|
| 228 | 236 | $token = new HTMLPurifier_Token_Text('<'); |
| 229 | 237 | if ($maintain_line_numbers) { |
| 230 | 238 | $token->rawPosition($current_line, $current_col); |
@@ -296,7 +304,9 @@ discard block |
||
| 296 | 304 | continue; |
| 297 | 305 | } else { |
| 298 | 306 | // inside tag, but there's no ending > sign |
| 299 | - if ($e) $e->send(E_WARNING, 'Lexer: Missing gt'); |
|
| 307 | + if ($e) { |
|
| 308 | + $e->send(E_WARNING, 'Lexer: Missing gt'); |
|
| 309 | + } |
|
| 300 | 310 | $token = new |
| 301 | 311 | HTMLPurifier_Token_Text( |
| 302 | 312 | '<' . |
@@ -304,7 +314,9 @@ discard block |
||
| 304 | 314 | substr($html, $cursor) |
| 305 | 315 | ) |
| 306 | 316 | ); |
| 307 | - if ($maintain_line_numbers) $token->rawPosition($current_line, $current_col); |
|
| 317 | + if ($maintain_line_numbers) { |
|
| 318 | + $token->rawPosition($current_line, $current_col); |
|
| 319 | + } |
|
| 308 | 320 | // no cursor scroll? Hmm... |
| 309 | 321 | $array[] = $token; |
| 310 | 322 | break; |
@@ -342,7 +354,10 @@ discard block |
||
| 342 | 354 | public function parseAttributeString($string, $config, $context) { |
| 343 | 355 | $string = (string) $string; // quick typecast |
| 344 | 356 | |
| 345 | - if ($string == '') return array(); // no attributes |
|
| 357 | + if ($string == '') { |
|
| 358 | + return array(); |
|
| 359 | + } |
|
| 360 | + // no attributes |
|
| 346 | 361 | |
| 347 | 362 | $e = false; |
| 348 | 363 | if ($config->get('Core.CollectErrors')) { |
@@ -361,10 +376,14 @@ discard block |
||
| 361 | 376 | list($key, $quoted_value) = explode('=', $string); |
| 362 | 377 | $quoted_value = trim($quoted_value); |
| 363 | 378 | if (!$key) { |
| 364 | - if ($e) $e->send(E_ERROR, 'Lexer: Missing attribute key'); |
|
| 379 | + if ($e) { |
|
| 380 | + $e->send(E_ERROR, 'Lexer: Missing attribute key'); |
|
| 381 | + } |
|
| 365 | 382 | return array(); |
| 366 | 383 | } |
| 367 | - if (!$quoted_value) return array($key => ''); |
|
| 384 | + if (!$quoted_value) { |
|
| 385 | + return array($key => ''); |
|
| 386 | + } |
|
| 368 | 387 | $first_char = @$quoted_value[0]; |
| 369 | 388 | $last_char = @$quoted_value[strlen($quoted_value)-1]; |
| 370 | 389 | |
@@ -377,13 +396,17 @@ discard block |
||
| 377 | 396 | } else { |
| 378 | 397 | // not well behaved |
| 379 | 398 | if ($open_quote) { |
| 380 | - if ($e) $e->send(E_ERROR, 'Lexer: Missing end quote'); |
|
| 399 | + if ($e) { |
|
| 400 | + $e->send(E_ERROR, 'Lexer: Missing end quote'); |
|
| 401 | + } |
|
| 381 | 402 | $value = substr($quoted_value, 1); |
| 382 | 403 | } else { |
| 383 | 404 | $value = $quoted_value; |
| 384 | 405 | } |
| 385 | 406 | } |
| 386 | - if ($value === false) $value = ''; |
|
| 407 | + if ($value === false) { |
|
| 408 | + $value = ''; |
|
| 409 | + } |
|
| 387 | 410 | return array($key => $this->parseData($value)); |
| 388 | 411 | } |
| 389 | 412 | |
@@ -415,7 +438,9 @@ discard block |
||
| 415 | 438 | $key = substr($string, $key_begin, $key_end - $key_begin); |
| 416 | 439 | |
| 417 | 440 | if (!$key) { |
| 418 | - if ($e) $e->send(E_ERROR, 'Lexer: Missing attribute key'); |
|
| 441 | + if ($e) { |
|
| 442 | + $e->send(E_ERROR, 'Lexer: Missing attribute key'); |
|
| 443 | + } |
|
| 419 | 444 | $cursor += strcspn($string, $this->_whitespace, $cursor + 1); // prevent infinite loop |
| 420 | 445 | continue; // empty key |
| 421 | 446 | } |
@@ -467,7 +492,9 @@ discard block |
||
| 467 | 492 | } |
| 468 | 493 | |
| 469 | 494 | $value = substr($string, $value_begin, $value_end - $value_begin); |
| 470 | - if ($value === false) $value = ''; |
|
| 495 | + if ($value === false) { |
|
| 496 | + $value = ''; |
|
| 497 | + } |
|
| 471 | 498 | $array[$key] = $this->parseData($value); |
| 472 | 499 | $cursor++; |
| 473 | 500 | |
@@ -477,7 +504,9 @@ discard block |
||
| 477 | 504 | $array[$key] = $key; |
| 478 | 505 | } else { |
| 479 | 506 | // purely theoretical |
| 480 | - if ($e) $e->send(E_ERROR, 'Lexer: Missing attribute key'); |
|
| 507 | + if ($e) { |
|
| 508 | + $e->send(E_ERROR, 'Lexer: Missing attribute key'); |
|
| 509 | + } |
|
| 481 | 510 | } |
| 482 | 511 | |
| 483 | 512 | } |
@@ -25,7 +25,7 @@ discard block |
||
| 25 | 25 | * @param $matches, in form of array(opening tag, contents, closing tag) |
| 26 | 26 | */ |
| 27 | 27 | protected function scriptCallback($matches) { |
| 28 | - return $matches[1] . htmlspecialchars($matches[2], ENT_COMPAT, 'UTF-8', false) . $matches[3]; |
|
| 28 | + return $matches[1].htmlspecialchars($matches[2], ENT_COMPAT, 'UTF-8', false).$matches[3]; |
|
| 29 | 29 | } |
| 30 | 30 | |
| 31 | 31 | public function tokenizeHTML($html, $config, $context) { |
@@ -63,7 +63,7 @@ discard block |
||
| 63 | 63 | $length = false; |
| 64 | 64 | } |
| 65 | 65 | $context->register('CurrentLine', $current_line); |
| 66 | - $context->register('CurrentCol', $current_col); |
|
| 66 | + $context->register('CurrentCol', $current_col); |
|
| 67 | 67 | $nl = "\n"; |
| 68 | 68 | // how often to manually recalculate. This will ALWAYS be right, |
| 69 | 69 | // but it's pretty wasteful. Set to 0 to turn off |
@@ -71,13 +71,13 @@ discard block |
||
| 71 | 71 | |
| 72 | 72 | $e = false; |
| 73 | 73 | if ($config->get('Core.CollectErrors')) { |
| 74 | - $e =& $context->get('ErrorCollector'); |
|
| 74 | + $e = & $context->get('ErrorCollector'); |
|
| 75 | 75 | } |
| 76 | 76 | |
| 77 | 77 | // for testing synchronization |
| 78 | 78 | $loops = 0; |
| 79 | 79 | |
| 80 | - while(++$loops) { |
|
| 80 | + while (++$loops) { |
|
| 81 | 81 | |
| 82 | 82 | // $cursor is either at the start of a token, or inside of |
| 83 | 83 | // a tag (i.e. there was a < immediately before it), as indicated |
@@ -97,8 +97,8 @@ discard block |
||
| 97 | 97 | |
| 98 | 98 | // recalculate lines |
| 99 | 99 | if ( |
| 100 | - $synchronize_interval && // synchronization is on |
|
| 101 | - $cursor > 0 && // cursor is further than zero |
|
| 100 | + $synchronize_interval && // synchronization is on |
|
| 101 | + $cursor > 0 && // cursor is further than zero |
|
| 102 | 102 | $loops % $synchronize_interval === 0 // time to synchronize! |
| 103 | 103 | ) { |
| 104 | 104 | $current_line = 1 + $this->substrCount($html, $nl, 0, $cursor); |
@@ -205,7 +205,7 @@ discard block |
||
| 205 | 205 | } |
| 206 | 206 | |
| 207 | 207 | // Check if it's an end tag |
| 208 | - $is_end_tag = (strpos($segment,'/') === 0); |
|
| 208 | + $is_end_tag = (strpos($segment, '/') === 0); |
|
| 209 | 209 | if ($is_end_tag) { |
| 210 | 210 | $type = substr($segment, 1); |
| 211 | 211 | $token = new HTMLPurifier_Token_End($type); |
@@ -239,7 +239,7 @@ discard block |
||
| 239 | 239 | // trailing slash. Remember, we could have a tag like <br>, so |
| 240 | 240 | // any later token processing scripts must convert improperly |
| 241 | 241 | // classified EmptyTags from StartTags. |
| 242 | - $is_self_closing = (strrpos($segment,'/') === $strlen_segment-1); |
|
| 242 | + $is_self_closing = (strrpos($segment, '/') === $strlen_segment - 1); |
|
| 243 | 243 | if ($is_self_closing) { |
| 244 | 244 | $strlen_segment--; |
| 245 | 245 | $segment = substr($segment, 0, $strlen_segment); |
@@ -299,7 +299,7 @@ discard block |
||
| 299 | 299 | if ($e) $e->send(E_WARNING, 'Lexer: Missing gt'); |
| 300 | 300 | $token = new |
| 301 | 301 | HTMLPurifier_Token_Text( |
| 302 | - '<' . |
|
| 302 | + '<'. |
|
| 303 | 303 | $this->parseData( |
| 304 | 304 | substr($html, $cursor) |
| 305 | 305 | ) |
@@ -346,7 +346,7 @@ discard block |
||
| 346 | 346 | |
| 347 | 347 | $e = false; |
| 348 | 348 | if ($config->get('Core.CollectErrors')) { |
| 349 | - $e =& $context->get('ErrorCollector'); |
|
| 349 | + $e = & $context->get('ErrorCollector'); |
|
| 350 | 350 | } |
| 351 | 351 | |
| 352 | 352 | // let's see if we can abort as quickly as possible |
@@ -366,12 +366,12 @@ discard block |
||
| 366 | 366 | } |
| 367 | 367 | if (!$quoted_value) return array($key => ''); |
| 368 | 368 | $first_char = @$quoted_value[0]; |
| 369 | - $last_char = @$quoted_value[strlen($quoted_value)-1]; |
|
| 369 | + $last_char = @$quoted_value[strlen($quoted_value) - 1]; |
|
| 370 | 370 | |
| 371 | 371 | $same_quote = ($first_char == $last_char); |
| 372 | 372 | $open_quote = ($first_char == '"' || $first_char == "'"); |
| 373 | 373 | |
| 374 | - if ( $same_quote && $open_quote) { |
|
| 374 | + if ($same_quote && $open_quote) { |
|
| 375 | 375 | // well behaved |
| 376 | 376 | $value = substr($quoted_value, 1, strlen($quoted_value) - 2); |
| 377 | 377 | } else { |
@@ -396,7 +396,7 @@ discard block |
||
| 396 | 396 | // space, so let's guarantee that there's always a terminating space. |
| 397 | 397 | $string .= ' '; |
| 398 | 398 | |
| 399 | - while(true) { |
|
| 399 | + while (true) { |
|
| 400 | 400 | |
| 401 | 401 | if ($cursor >= $size) { |
| 402 | 402 | break; |
@@ -408,7 +408,7 @@ discard block |
||
| 408 | 408 | $key_begin = $cursor; //we're currently at the start of the key |
| 409 | 409 | |
| 410 | 410 | // scroll past all characters that are the key (not whitespace or =) |
| 411 | - $cursor += strcspn($string, $this->_whitespace . '=', $cursor); |
|
| 411 | + $cursor += strcspn($string, $this->_whitespace.'=', $cursor); |
|
| 412 | 412 | |
| 413 | 413 | $key_end = $cursor; // now at the end of the key |
| 414 | 414 | |
@@ -111,7 +111,7 @@ |
||
| 111 | 111 | /** |
| 112 | 112 | * @param $node DOMNode to be tokenized. |
| 113 | 113 | * @param $tokens Array-list of already tokenized tokens. |
| 114 | - * @param $collect Says whether or start and close are collected, set to |
|
| 114 | + * @param boolean $collect Says whether or start and close are collected, set to |
|
| 115 | 115 | * false at first recursion because it's the implicit DIV |
| 116 | 116 | * tag you're dealing with. |
| 117 | 117 | * @returns bool if the token needs an endtoken |
@@ -27,216 +27,216 @@ |
||
| 27 | 27 | class HTMLPurifier_Lexer_DOMLex extends HTMLPurifier_Lexer |
| 28 | 28 | { |
| 29 | 29 | |
| 30 | - private $factory; |
|
| 31 | - |
|
| 32 | - public function __construct() { |
|
| 33 | - // setup the factory |
|
| 34 | - parent::__construct(); |
|
| 35 | - $this->factory = new HTMLPurifier_TokenFactory(); |
|
| 36 | - } |
|
| 37 | - |
|
| 38 | - public function tokenizeHTML($html, $config, $context) { |
|
| 39 | - |
|
| 40 | - $html = $this->normalize($html, $config, $context); |
|
| 41 | - |
|
| 42 | - // attempt to armor stray angled brackets that cannot possibly |
|
| 43 | - // form tags and thus are probably being used as emoticons |
|
| 44 | - if ($config->get('Core.AggressivelyFixLt')) { |
|
| 45 | - $char = '[^a-z!\/]'; |
|
| 46 | - $comment = "/<!--(.*?)(-->|\z)/is"; |
|
| 47 | - $html = preg_replace_callback($comment, array($this, 'callbackArmorCommentEntities'), $html); |
|
| 48 | - do { |
|
| 49 | - $old = $html; |
|
| 50 | - $html = preg_replace("/<($char)/i", '<\\1', $html); |
|
| 51 | - } while ($html !== $old); |
|
| 52 | - $html = preg_replace_callback($comment, array($this, 'callbackUndoCommentSubst'), $html); // fix comments |
|
| 53 | - } |
|
| 54 | - |
|
| 55 | - // preprocess html, essential for UTF-8 |
|
| 56 | - $html = $this->wrapHTML($html, $config, $context); |
|
| 57 | - |
|
| 58 | - $doc = new DOMDocument(); |
|
| 59 | - $doc->encoding = 'UTF-8'; // theoretically, the above has this covered |
|
| 60 | - |
|
| 61 | - set_error_handler(array($this, 'muteErrorHandler')); |
|
| 62 | - $doc->loadHTML($html); |
|
| 63 | - restore_error_handler(); |
|
| 64 | - |
|
| 65 | - $tokens = array(); |
|
| 66 | - $this->tokenizeDOM( |
|
| 67 | - $doc->getElementsByTagName('html')->item(0)-> // <html> |
|
| 68 | - getElementsByTagName('body')->item(0)-> // <body> |
|
| 69 | - getElementsByTagName('div')->item(0) // <div> |
|
| 70 | - , $tokens); |
|
| 71 | - return $tokens; |
|
| 72 | - } |
|
| 73 | - |
|
| 74 | - /** |
|
| 75 | - * Iterative function that tokenizes a node, putting it into an accumulator. |
|
| 76 | - * To iterate is human, to recurse divine - L. Peter Deutsch |
|
| 77 | - * @param $node DOMNode to be tokenized. |
|
| 78 | - * @param $tokens Array-list of already tokenized tokens. |
|
| 79 | - * @returns Tokens of node appended to previously passed tokens. |
|
| 80 | - */ |
|
| 81 | - protected function tokenizeDOM($node, &$tokens) { |
|
| 82 | - |
|
| 83 | - $level = 0; |
|
| 84 | - $nodes = array($level => array($node)); |
|
| 85 | - $closingNodes = array(); |
|
| 86 | - do { |
|
| 87 | - while (!empty($nodes[$level])) { |
|
| 88 | - $node = array_shift($nodes[$level]); // FIFO |
|
| 89 | - $collect = $level > 0 ? true : false; |
|
| 90 | - $needEndingTag = $this->createStartNode($node, $tokens, $collect); |
|
| 91 | - if ($needEndingTag) { |
|
| 92 | - $closingNodes[$level][] = $node; |
|
| 93 | - } |
|
| 94 | - if ($node->childNodes && $node->childNodes->length) { |
|
| 95 | - $level++; |
|
| 96 | - $nodes[$level] = array(); |
|
| 97 | - foreach ($node->childNodes as $childNode) { |
|
| 98 | - array_push($nodes[$level], $childNode); |
|
| 99 | - } |
|
| 100 | - } |
|
| 101 | - } |
|
| 102 | - $level--; |
|
| 103 | - if ($level && isset($closingNodes[$level])) { |
|
| 104 | - while($node = array_pop($closingNodes[$level])) { |
|
| 105 | - $this->createEndNode($node, $tokens); |
|
| 106 | - } |
|
| 107 | - } |
|
| 108 | - } while ($level > 0); |
|
| 109 | - } |
|
| 110 | - |
|
| 111 | - /** |
|
| 112 | - * @param $node DOMNode to be tokenized. |
|
| 113 | - * @param $tokens Array-list of already tokenized tokens. |
|
| 114 | - * @param $collect Says whether or start and close are collected, set to |
|
| 115 | - * false at first recursion because it's the implicit DIV |
|
| 116 | - * tag you're dealing with. |
|
| 117 | - * @returns bool if the token needs an endtoken |
|
| 118 | - */ |
|
| 119 | - protected function createStartNode($node, &$tokens, $collect) { |
|
| 120 | - // intercept non element nodes. WE MUST catch all of them, |
|
| 121 | - // but we're not getting the character reference nodes because |
|
| 122 | - // those should have been preprocessed |
|
| 123 | - if ($node->nodeType === XML_TEXT_NODE) { |
|
| 124 | - $tokens[] = $this->factory->createText($node->data); |
|
| 125 | - return false; |
|
| 126 | - } elseif ($node->nodeType === XML_CDATA_SECTION_NODE) { |
|
| 127 | - // undo libxml's special treatment of <script> and <style> tags |
|
| 128 | - $last = end($tokens); |
|
| 129 | - $data = $node->data; |
|
| 130 | - // (note $node->tagname is already normalized) |
|
| 131 | - if ($last instanceof HTMLPurifier_Token_Start && ($last->name == 'script' || $last->name == 'style')) { |
|
| 132 | - $new_data = trim($data); |
|
| 133 | - if (substr($new_data, 0, 4) === '<!--') { |
|
| 134 | - $data = substr($new_data, 4); |
|
| 135 | - if (substr($data, -3) === '-->') { |
|
| 136 | - $data = substr($data, 0, -3); |
|
| 137 | - } else { |
|
| 138 | - // Highly suspicious! Not sure what to do... |
|
| 139 | - } |
|
| 140 | - } |
|
| 141 | - } |
|
| 142 | - $tokens[] = $this->factory->createText($this->parseData($data)); |
|
| 143 | - return false; |
|
| 144 | - } elseif ($node->nodeType === XML_COMMENT_NODE) { |
|
| 145 | - // this is code is only invoked for comments in script/style in versions |
|
| 146 | - // of libxml pre-2.6.28 (regular comments, of course, are still |
|
| 147 | - // handled regularly) |
|
| 148 | - $tokens[] = $this->factory->createComment($node->data); |
|
| 149 | - return false; |
|
| 150 | - } elseif ( |
|
| 151 | - // not-well tested: there may be other nodes we have to grab |
|
| 152 | - $node->nodeType !== XML_ELEMENT_NODE |
|
| 153 | - ) { |
|
| 154 | - return false; |
|
| 155 | - } |
|
| 156 | - |
|
| 157 | - $attr = $node->hasAttributes() ? $this->transformAttrToAssoc($node->attributes) : array(); |
|
| 158 | - |
|
| 159 | - // We still have to make sure that the element actually IS empty |
|
| 160 | - if (!$node->childNodes->length) { |
|
| 161 | - if ($collect) { |
|
| 162 | - $tokens[] = $this->factory->createEmpty($node->tagName, $attr); |
|
| 163 | - } |
|
| 164 | - return false; |
|
| 165 | - } else { |
|
| 166 | - if ($collect) { |
|
| 167 | - $tokens[] = $this->factory->createStart( |
|
| 168 | - $tag_name = $node->tagName, // somehow, it get's dropped |
|
| 169 | - $attr |
|
| 170 | - ); |
|
| 171 | - } |
|
| 172 | - return true; |
|
| 173 | - } |
|
| 174 | - } |
|
| 175 | - |
|
| 176 | - protected function createEndNode($node, &$tokens) { |
|
| 177 | - $tokens[] = $this->factory->createEnd($node->tagName); |
|
| 178 | - } |
|
| 179 | - |
|
| 180 | - |
|
| 181 | - /** |
|
| 182 | - * Converts a DOMNamedNodeMap of DOMAttr objects into an assoc array. |
|
| 183 | - * |
|
| 184 | - * @param $attribute_list DOMNamedNodeMap of DOMAttr objects. |
|
| 185 | - * @returns Associative array of attributes. |
|
| 186 | - */ |
|
| 187 | - protected function transformAttrToAssoc($node_map) { |
|
| 188 | - // NamedNodeMap is documented very well, so we're using undocumented |
|
| 189 | - // features, namely, the fact that it implements Iterator and |
|
| 190 | - // has a ->length attribute |
|
| 191 | - if ($node_map->length === 0) return array(); |
|
| 192 | - $array = array(); |
|
| 193 | - foreach ($node_map as $attr) { |
|
| 194 | - $array[$attr->name] = $attr->value; |
|
| 195 | - } |
|
| 196 | - return $array; |
|
| 197 | - } |
|
| 198 | - |
|
| 199 | - /** |
|
| 200 | - * An error handler that mutes all errors |
|
| 201 | - */ |
|
| 202 | - public function muteErrorHandler($errno, $errstr) {} |
|
| 203 | - |
|
| 204 | - /** |
|
| 205 | - * Callback function for undoing escaping of stray angled brackets |
|
| 206 | - * in comments |
|
| 207 | - */ |
|
| 208 | - public function callbackUndoCommentSubst($matches) { |
|
| 209 | - return '<!--' . strtr($matches[1], array('&'=>'&','<'=>'<')) . $matches[2]; |
|
| 210 | - } |
|
| 211 | - |
|
| 212 | - /** |
|
| 213 | - * Callback function that entity-izes ampersands in comments so that |
|
| 214 | - * callbackUndoCommentSubst doesn't clobber them |
|
| 215 | - */ |
|
| 216 | - public function callbackArmorCommentEntities($matches) { |
|
| 217 | - return '<!--' . str_replace('&', '&', $matches[1]) . $matches[2]; |
|
| 218 | - } |
|
| 219 | - |
|
| 220 | - /** |
|
| 221 | - * Wraps an HTML fragment in the necessary HTML |
|
| 222 | - */ |
|
| 223 | - protected function wrapHTML($html, $config, $context) { |
|
| 224 | - $def = $config->getDefinition('HTML'); |
|
| 225 | - $ret = ''; |
|
| 226 | - |
|
| 227 | - if (!empty($def->doctype->dtdPublic) || !empty($def->doctype->dtdSystem)) { |
|
| 228 | - $ret .= '<!DOCTYPE html '; |
|
| 229 | - if (!empty($def->doctype->dtdPublic)) $ret .= 'PUBLIC "' . $def->doctype->dtdPublic . '" '; |
|
| 230 | - if (!empty($def->doctype->dtdSystem)) $ret .= '"' . $def->doctype->dtdSystem . '" '; |
|
| 231 | - $ret .= '>'; |
|
| 232 | - } |
|
| 233 | - |
|
| 234 | - $ret .= '<html><head>'; |
|
| 235 | - $ret .= '<meta http-equiv="Content-Type" content="text/html; charset=utf-8" />'; |
|
| 236 | - // No protection if $html contains a stray </div>! |
|
| 237 | - $ret .= '</head><body><div>'.$html.'</div></body></html>'; |
|
| 238 | - return $ret; |
|
| 239 | - } |
|
| 30 | + private $factory; |
|
| 31 | + |
|
| 32 | + public function __construct() { |
|
| 33 | + // setup the factory |
|
| 34 | + parent::__construct(); |
|
| 35 | + $this->factory = new HTMLPurifier_TokenFactory(); |
|
| 36 | + } |
|
| 37 | + |
|
| 38 | + public function tokenizeHTML($html, $config, $context) { |
|
| 39 | + |
|
| 40 | + $html = $this->normalize($html, $config, $context); |
|
| 41 | + |
|
| 42 | + // attempt to armor stray angled brackets that cannot possibly |
|
| 43 | + // form tags and thus are probably being used as emoticons |
|
| 44 | + if ($config->get('Core.AggressivelyFixLt')) { |
|
| 45 | + $char = '[^a-z!\/]'; |
|
| 46 | + $comment = "/<!--(.*?)(-->|\z)/is"; |
|
| 47 | + $html = preg_replace_callback($comment, array($this, 'callbackArmorCommentEntities'), $html); |
|
| 48 | + do { |
|
| 49 | + $old = $html; |
|
| 50 | + $html = preg_replace("/<($char)/i", '<\\1', $html); |
|
| 51 | + } while ($html !== $old); |
|
| 52 | + $html = preg_replace_callback($comment, array($this, 'callbackUndoCommentSubst'), $html); // fix comments |
|
| 53 | + } |
|
| 54 | + |
|
| 55 | + // preprocess html, essential for UTF-8 |
|
| 56 | + $html = $this->wrapHTML($html, $config, $context); |
|
| 57 | + |
|
| 58 | + $doc = new DOMDocument(); |
|
| 59 | + $doc->encoding = 'UTF-8'; // theoretically, the above has this covered |
|
| 60 | + |
|
| 61 | + set_error_handler(array($this, 'muteErrorHandler')); |
|
| 62 | + $doc->loadHTML($html); |
|
| 63 | + restore_error_handler(); |
|
| 64 | + |
|
| 65 | + $tokens = array(); |
|
| 66 | + $this->tokenizeDOM( |
|
| 67 | + $doc->getElementsByTagName('html')->item(0)-> // <html> |
|
| 68 | + getElementsByTagName('body')->item(0)-> // <body> |
|
| 69 | + getElementsByTagName('div')->item(0) // <div> |
|
| 70 | + , $tokens); |
|
| 71 | + return $tokens; |
|
| 72 | + } |
|
| 73 | + |
|
| 74 | + /** |
|
| 75 | + * Iterative function that tokenizes a node, putting it into an accumulator. |
|
| 76 | + * To iterate is human, to recurse divine - L. Peter Deutsch |
|
| 77 | + * @param $node DOMNode to be tokenized. |
|
| 78 | + * @param $tokens Array-list of already tokenized tokens. |
|
| 79 | + * @returns Tokens of node appended to previously passed tokens. |
|
| 80 | + */ |
|
| 81 | + protected function tokenizeDOM($node, &$tokens) { |
|
| 82 | + |
|
| 83 | + $level = 0; |
|
| 84 | + $nodes = array($level => array($node)); |
|
| 85 | + $closingNodes = array(); |
|
| 86 | + do { |
|
| 87 | + while (!empty($nodes[$level])) { |
|
| 88 | + $node = array_shift($nodes[$level]); // FIFO |
|
| 89 | + $collect = $level > 0 ? true : false; |
|
| 90 | + $needEndingTag = $this->createStartNode($node, $tokens, $collect); |
|
| 91 | + if ($needEndingTag) { |
|
| 92 | + $closingNodes[$level][] = $node; |
|
| 93 | + } |
|
| 94 | + if ($node->childNodes && $node->childNodes->length) { |
|
| 95 | + $level++; |
|
| 96 | + $nodes[$level] = array(); |
|
| 97 | + foreach ($node->childNodes as $childNode) { |
|
| 98 | + array_push($nodes[$level], $childNode); |
|
| 99 | + } |
|
| 100 | + } |
|
| 101 | + } |
|
| 102 | + $level--; |
|
| 103 | + if ($level && isset($closingNodes[$level])) { |
|
| 104 | + while($node = array_pop($closingNodes[$level])) { |
|
| 105 | + $this->createEndNode($node, $tokens); |
|
| 106 | + } |
|
| 107 | + } |
|
| 108 | + } while ($level > 0); |
|
| 109 | + } |
|
| 110 | + |
|
| 111 | + /** |
|
| 112 | + * @param $node DOMNode to be tokenized. |
|
| 113 | + * @param $tokens Array-list of already tokenized tokens. |
|
| 114 | + * @param $collect Says whether or start and close are collected, set to |
|
| 115 | + * false at first recursion because it's the implicit DIV |
|
| 116 | + * tag you're dealing with. |
|
| 117 | + * @returns bool if the token needs an endtoken |
|
| 118 | + */ |
|
| 119 | + protected function createStartNode($node, &$tokens, $collect) { |
|
| 120 | + // intercept non element nodes. WE MUST catch all of them, |
|
| 121 | + // but we're not getting the character reference nodes because |
|
| 122 | + // those should have been preprocessed |
|
| 123 | + if ($node->nodeType === XML_TEXT_NODE) { |
|
| 124 | + $tokens[] = $this->factory->createText($node->data); |
|
| 125 | + return false; |
|
| 126 | + } elseif ($node->nodeType === XML_CDATA_SECTION_NODE) { |
|
| 127 | + // undo libxml's special treatment of <script> and <style> tags |
|
| 128 | + $last = end($tokens); |
|
| 129 | + $data = $node->data; |
|
| 130 | + // (note $node->tagname is already normalized) |
|
| 131 | + if ($last instanceof HTMLPurifier_Token_Start && ($last->name == 'script' || $last->name == 'style')) { |
|
| 132 | + $new_data = trim($data); |
|
| 133 | + if (substr($new_data, 0, 4) === '<!--') { |
|
| 134 | + $data = substr($new_data, 4); |
|
| 135 | + if (substr($data, -3) === '-->') { |
|
| 136 | + $data = substr($data, 0, -3); |
|
| 137 | + } else { |
|
| 138 | + // Highly suspicious! Not sure what to do... |
|
| 139 | + } |
|
| 140 | + } |
|
| 141 | + } |
|
| 142 | + $tokens[] = $this->factory->createText($this->parseData($data)); |
|
| 143 | + return false; |
|
| 144 | + } elseif ($node->nodeType === XML_COMMENT_NODE) { |
|
| 145 | + // this is code is only invoked for comments in script/style in versions |
|
| 146 | + // of libxml pre-2.6.28 (regular comments, of course, are still |
|
| 147 | + // handled regularly) |
|
| 148 | + $tokens[] = $this->factory->createComment($node->data); |
|
| 149 | + return false; |
|
| 150 | + } elseif ( |
|
| 151 | + // not-well tested: there may be other nodes we have to grab |
|
| 152 | + $node->nodeType !== XML_ELEMENT_NODE |
|
| 153 | + ) { |
|
| 154 | + return false; |
|
| 155 | + } |
|
| 156 | + |
|
| 157 | + $attr = $node->hasAttributes() ? $this->transformAttrToAssoc($node->attributes) : array(); |
|
| 158 | + |
|
| 159 | + // We still have to make sure that the element actually IS empty |
|
| 160 | + if (!$node->childNodes->length) { |
|
| 161 | + if ($collect) { |
|
| 162 | + $tokens[] = $this->factory->createEmpty($node->tagName, $attr); |
|
| 163 | + } |
|
| 164 | + return false; |
|
| 165 | + } else { |
|
| 166 | + if ($collect) { |
|
| 167 | + $tokens[] = $this->factory->createStart( |
|
| 168 | + $tag_name = $node->tagName, // somehow, it get's dropped |
|
| 169 | + $attr |
|
| 170 | + ); |
|
| 171 | + } |
|
| 172 | + return true; |
|
| 173 | + } |
|
| 174 | + } |
|
| 175 | + |
|
| 176 | + protected function createEndNode($node, &$tokens) { |
|
| 177 | + $tokens[] = $this->factory->createEnd($node->tagName); |
|
| 178 | + } |
|
| 179 | + |
|
| 180 | + |
|
| 181 | + /** |
|
| 182 | + * Converts a DOMNamedNodeMap of DOMAttr objects into an assoc array. |
|
| 183 | + * |
|
| 184 | + * @param $attribute_list DOMNamedNodeMap of DOMAttr objects. |
|
| 185 | + * @returns Associative array of attributes. |
|
| 186 | + */ |
|
| 187 | + protected function transformAttrToAssoc($node_map) { |
|
| 188 | + // NamedNodeMap is documented very well, so we're using undocumented |
|
| 189 | + // features, namely, the fact that it implements Iterator and |
|
| 190 | + // has a ->length attribute |
|
| 191 | + if ($node_map->length === 0) return array(); |
|
| 192 | + $array = array(); |
|
| 193 | + foreach ($node_map as $attr) { |
|
| 194 | + $array[$attr->name] = $attr->value; |
|
| 195 | + } |
|
| 196 | + return $array; |
|
| 197 | + } |
|
| 198 | + |
|
| 199 | + /** |
|
| 200 | + * An error handler that mutes all errors |
|
| 201 | + */ |
|
| 202 | + public function muteErrorHandler($errno, $errstr) {} |
|
| 203 | + |
|
| 204 | + /** |
|
| 205 | + * Callback function for undoing escaping of stray angled brackets |
|
| 206 | + * in comments |
|
| 207 | + */ |
|
| 208 | + public function callbackUndoCommentSubst($matches) { |
|
| 209 | + return '<!--' . strtr($matches[1], array('&'=>'&','<'=>'<')) . $matches[2]; |
|
| 210 | + } |
|
| 211 | + |
|
| 212 | + /** |
|
| 213 | + * Callback function that entity-izes ampersands in comments so that |
|
| 214 | + * callbackUndoCommentSubst doesn't clobber them |
|
| 215 | + */ |
|
| 216 | + public function callbackArmorCommentEntities($matches) { |
|
| 217 | + return '<!--' . str_replace('&', '&', $matches[1]) . $matches[2]; |
|
| 218 | + } |
|
| 219 | + |
|
| 220 | + /** |
|
| 221 | + * Wraps an HTML fragment in the necessary HTML |
|
| 222 | + */ |
|
| 223 | + protected function wrapHTML($html, $config, $context) { |
|
| 224 | + $def = $config->getDefinition('HTML'); |
|
| 225 | + $ret = ''; |
|
| 226 | + |
|
| 227 | + if (!empty($def->doctype->dtdPublic) || !empty($def->doctype->dtdSystem)) { |
|
| 228 | + $ret .= '<!DOCTYPE html '; |
|
| 229 | + if (!empty($def->doctype->dtdPublic)) $ret .= 'PUBLIC "' . $def->doctype->dtdPublic . '" '; |
|
| 230 | + if (!empty($def->doctype->dtdSystem)) $ret .= '"' . $def->doctype->dtdSystem . '" '; |
|
| 231 | + $ret .= '>'; |
|
| 232 | + } |
|
| 233 | + |
|
| 234 | + $ret .= '<html><head>'; |
|
| 235 | + $ret .= '<meta http-equiv="Content-Type" content="text/html; charset=utf-8" />'; |
|
| 236 | + // No protection if $html contains a stray </div>! |
|
| 237 | + $ret .= '</head><body><div>'.$html.'</div></body></html>'; |
|
| 238 | + return $ret; |
|
| 239 | + } |
|
| 240 | 240 | |
| 241 | 241 | } |
| 242 | 242 | |
@@ -188,7 +188,9 @@ discard block |
||
| 188 | 188 | // NamedNodeMap is documented very well, so we're using undocumented |
| 189 | 189 | // features, namely, the fact that it implements Iterator and |
| 190 | 190 | // has a ->length attribute |
| 191 | - if ($node_map->length === 0) return array(); |
|
| 191 | + if ($node_map->length === 0) { |
|
| 192 | + return array(); |
|
| 193 | + } |
|
| 192 | 194 | $array = array(); |
| 193 | 195 | foreach ($node_map as $attr) { |
| 194 | 196 | $array[$attr->name] = $attr->value; |
@@ -226,8 +228,12 @@ discard block |
||
| 226 | 228 | |
| 227 | 229 | if (!empty($def->doctype->dtdPublic) || !empty($def->doctype->dtdSystem)) { |
| 228 | 230 | $ret .= '<!DOCTYPE html '; |
| 229 | - if (!empty($def->doctype->dtdPublic)) $ret .= 'PUBLIC "' . $def->doctype->dtdPublic . '" '; |
|
| 230 | - if (!empty($def->doctype->dtdSystem)) $ret .= '"' . $def->doctype->dtdSystem . '" '; |
|
| 231 | + if (!empty($def->doctype->dtdPublic)) { |
|
| 232 | + $ret .= 'PUBLIC "' . $def->doctype->dtdPublic . '" '; |
|
| 233 | + } |
|
| 234 | + if (!empty($def->doctype->dtdSystem)) { |
|
| 235 | + $ret .= '"' . $def->doctype->dtdSystem . '" '; |
|
| 236 | + } |
|
| 231 | 237 | $ret .= '>'; |
| 232 | 238 | } |
| 233 | 239 | |
@@ -101,7 +101,7 @@ discard block |
||
| 101 | 101 | } |
| 102 | 102 | $level--; |
| 103 | 103 | if ($level && isset($closingNodes[$level])) { |
| 104 | - while($node = array_pop($closingNodes[$level])) { |
|
| 104 | + while ($node = array_pop($closingNodes[$level])) { |
|
| 105 | 105 | $this->createEndNode($node, $tokens); |
| 106 | 106 | } |
| 107 | 107 | } |
@@ -206,7 +206,7 @@ discard block |
||
| 206 | 206 | * in comments |
| 207 | 207 | */ |
| 208 | 208 | public function callbackUndoCommentSubst($matches) { |
| 209 | - return '<!--' . strtr($matches[1], array('&'=>'&','<'=>'<')) . $matches[2]; |
|
| 209 | + return '<!--'.strtr($matches[1], array('&'=>'&', '<'=>'<')).$matches[2]; |
|
| 210 | 210 | } |
| 211 | 211 | |
| 212 | 212 | /** |
@@ -214,7 +214,7 @@ discard block |
||
| 214 | 214 | * callbackUndoCommentSubst doesn't clobber them |
| 215 | 215 | */ |
| 216 | 216 | public function callbackArmorCommentEntities($matches) { |
| 217 | - return '<!--' . str_replace('&', '&', $matches[1]) . $matches[2]; |
|
| 217 | + return '<!--'.str_replace('&', '&', $matches[1]).$matches[2]; |
|
| 218 | 218 | } |
| 219 | 219 | |
| 220 | 220 | /** |
@@ -226,8 +226,8 @@ discard block |
||
| 226 | 226 | |
| 227 | 227 | if (!empty($def->doctype->dtdPublic) || !empty($def->doctype->dtdSystem)) { |
| 228 | 228 | $ret .= '<!DOCTYPE html '; |
| 229 | - if (!empty($def->doctype->dtdPublic)) $ret .= 'PUBLIC "' . $def->doctype->dtdPublic . '" '; |
|
| 230 | - if (!empty($def->doctype->dtdSystem)) $ret .= '"' . $def->doctype->dtdSystem . '" '; |
|
| 229 | + if (!empty($def->doctype->dtdPublic)) $ret .= 'PUBLIC "'.$def->doctype->dtdPublic.'" '; |
|
| 230 | + if (!empty($def->doctype->dtdSystem)) $ret .= '"'.$def->doctype->dtdSystem.'" '; |
|
| 231 | 231 | $ret .= '>'; |
| 232 | 232 | } |
| 233 | 233 | |
@@ -124,6 +124,9 @@ discard block |
||
| 124 | 124 | const CHARACTR = 4; |
| 125 | 125 | const EOF = 5; |
| 126 | 126 | |
| 127 | + /** |
|
| 128 | + * @param string $data |
|
| 129 | + */ |
|
| 127 | 130 | public function __construct($data) { |
| 128 | 131 | |
| 129 | 132 | $this->data = $data; |
@@ -143,12 +146,20 @@ discard block |
||
| 143 | 146 | return $this->tree->save(); |
| 144 | 147 | } |
| 145 | 148 | |
| 149 | + /** |
|
| 150 | + * @return string |
|
| 151 | + */ |
|
| 146 | 152 | private function char() { |
| 147 | 153 | return ($this->char < $this->EOF) |
| 148 | 154 | ? $this->data[$this->char] |
| 149 | 155 | : false; |
| 150 | 156 | } |
| 151 | 157 | |
| 158 | + /** |
|
| 159 | + * @param integer $s |
|
| 160 | + * |
|
| 161 | + * @return string |
|
| 162 | + */ |
|
| 152 | 163 | private function character($s, $l = 0) { |
| 153 | 164 | if($s + $l < $this->EOF) { |
| 154 | 165 | if($l === 0) { |
@@ -159,6 +170,10 @@ discard block |
||
| 159 | 170 | } |
| 160 | 171 | } |
| 161 | 172 | |
| 173 | + /** |
|
| 174 | + * @param string $char_class |
|
| 175 | + * @param integer $start |
|
| 176 | + */ |
|
| 162 | 177 | private function characters($char_class, $start) { |
| 163 | 178 | return preg_replace('#^(['.$char_class.']+).*#s', '\\1', substr($this->data, $start)); |
| 164 | 179 | } |
@@ -3767,6 +3782,9 @@ discard block |
||
| 3767 | 3782 | return self::PHRASING; |
| 3768 | 3783 | } |
| 3769 | 3784 | |
| 3785 | + /** |
|
| 3786 | + * @param string[] $elements |
|
| 3787 | + */ |
|
| 3770 | 3788 | private function clearStackToTableContext($elements) { |
| 3771 | 3789 | /* When the steps above require the UA to clear the stack back to a |
| 3772 | 3790 | table context, it means that the UA must, while the current node is not |
@@ -12,26 +12,26 @@ discard block |
||
| 12 | 12 | |
| 13 | 13 | class HTMLPurifier_Lexer_PH5P extends HTMLPurifier_Lexer_DOMLex { |
| 14 | 14 | |
| 15 | - public function tokenizeHTML($html, $config, $context) { |
|
| 16 | - $new_html = $this->normalize($html, $config, $context); |
|
| 17 | - $new_html = $this->wrapHTML($new_html, $config, $context); |
|
| 18 | - try { |
|
| 19 | - $parser = new HTML5($new_html); |
|
| 20 | - $doc = $parser->save(); |
|
| 21 | - } catch (DOMException $e) { |
|
| 22 | - // Uh oh, it failed. Punt to DirectLex. |
|
| 23 | - $lexer = new HTMLPurifier_Lexer_DirectLex(); |
|
| 24 | - $context->register('PH5PError', $e); // save the error, so we can detect it |
|
| 25 | - return $lexer->tokenizeHTML($html, $config, $context); // use original HTML |
|
| 26 | - } |
|
| 27 | - $tokens = array(); |
|
| 28 | - $this->tokenizeDOM( |
|
| 29 | - $doc->getElementsByTagName('html')->item(0)-> // <html> |
|
| 30 | - getElementsByTagName('body')->item(0)-> // <body> |
|
| 31 | - getElementsByTagName('div')->item(0) // <div> |
|
| 32 | - , $tokens); |
|
| 33 | - return $tokens; |
|
| 34 | - } |
|
| 15 | + public function tokenizeHTML($html, $config, $context) { |
|
| 16 | + $new_html = $this->normalize($html, $config, $context); |
|
| 17 | + $new_html = $this->wrapHTML($new_html, $config, $context); |
|
| 18 | + try { |
|
| 19 | + $parser = new HTML5($new_html); |
|
| 20 | + $doc = $parser->save(); |
|
| 21 | + } catch (DOMException $e) { |
|
| 22 | + // Uh oh, it failed. Punt to DirectLex. |
|
| 23 | + $lexer = new HTMLPurifier_Lexer_DirectLex(); |
|
| 24 | + $context->register('PH5PError', $e); // save the error, so we can detect it |
|
| 25 | + return $lexer->tokenizeHTML($html, $config, $context); // use original HTML |
|
| 26 | + } |
|
| 27 | + $tokens = array(); |
|
| 28 | + $this->tokenizeDOM( |
|
| 29 | + $doc->getElementsByTagName('html')->item(0)-> // <html> |
|
| 30 | + getElementsByTagName('body')->item(0)-> // <body> |
|
| 31 | + getElementsByTagName('div')->item(0) // <div> |
|
| 32 | + , $tokens); |
|
| 33 | + return $tokens; |
|
| 34 | + } |
|
| 35 | 35 | |
| 36 | 36 | } |
| 37 | 37 | |
@@ -61,145 +61,145 @@ discard block |
||
| 61 | 61 | */ |
| 62 | 62 | |
| 63 | 63 | class HTML5 { |
| 64 | - private $data; |
|
| 65 | - private $char; |
|
| 66 | - private $EOF; |
|
| 67 | - private $state; |
|
| 68 | - private $tree; |
|
| 69 | - private $token; |
|
| 70 | - private $content_model; |
|
| 71 | - private $escape = false; |
|
| 72 | - private $entities = array('AElig;','AElig','AMP;','AMP','Aacute;','Aacute', |
|
| 73 | - 'Acirc;','Acirc','Agrave;','Agrave','Alpha;','Aring;','Aring','Atilde;', |
|
| 74 | - 'Atilde','Auml;','Auml','Beta;','COPY;','COPY','Ccedil;','Ccedil','Chi;', |
|
| 75 | - 'Dagger;','Delta;','ETH;','ETH','Eacute;','Eacute','Ecirc;','Ecirc','Egrave;', |
|
| 76 | - 'Egrave','Epsilon;','Eta;','Euml;','Euml','GT;','GT','Gamma;','Iacute;', |
|
| 77 | - 'Iacute','Icirc;','Icirc','Igrave;','Igrave','Iota;','Iuml;','Iuml','Kappa;', |
|
| 78 | - 'LT;','LT','Lambda;','Mu;','Ntilde;','Ntilde','Nu;','OElig;','Oacute;', |
|
| 79 | - 'Oacute','Ocirc;','Ocirc','Ograve;','Ograve','Omega;','Omicron;','Oslash;', |
|
| 80 | - 'Oslash','Otilde;','Otilde','Ouml;','Ouml','Phi;','Pi;','Prime;','Psi;', |
|
| 81 | - 'QUOT;','QUOT','REG;','REG','Rho;','Scaron;','Sigma;','THORN;','THORN', |
|
| 82 | - 'TRADE;','Tau;','Theta;','Uacute;','Uacute','Ucirc;','Ucirc','Ugrave;', |
|
| 83 | - 'Ugrave','Upsilon;','Uuml;','Uuml','Xi;','Yacute;','Yacute','Yuml;','Zeta;', |
|
| 84 | - 'aacute;','aacute','acirc;','acirc','acute;','acute','aelig;','aelig', |
|
| 85 | - 'agrave;','agrave','alefsym;','alpha;','amp;','amp','and;','ang;','apos;', |
|
| 86 | - 'aring;','aring','asymp;','atilde;','atilde','auml;','auml','bdquo;','beta;', |
|
| 87 | - 'brvbar;','brvbar','bull;','cap;','ccedil;','ccedil','cedil;','cedil', |
|
| 88 | - 'cent;','cent','chi;','circ;','clubs;','cong;','copy;','copy','crarr;', |
|
| 89 | - 'cup;','curren;','curren','dArr;','dagger;','darr;','deg;','deg','delta;', |
|
| 90 | - 'diams;','divide;','divide','eacute;','eacute','ecirc;','ecirc','egrave;', |
|
| 91 | - 'egrave','empty;','emsp;','ensp;','epsilon;','equiv;','eta;','eth;','eth', |
|
| 92 | - 'euml;','euml','euro;','exist;','fnof;','forall;','frac12;','frac12', |
|
| 93 | - 'frac14;','frac14','frac34;','frac34','frasl;','gamma;','ge;','gt;','gt', |
|
| 94 | - 'hArr;','harr;','hearts;','hellip;','iacute;','iacute','icirc;','icirc', |
|
| 95 | - 'iexcl;','iexcl','igrave;','igrave','image;','infin;','int;','iota;', |
|
| 96 | - 'iquest;','iquest','isin;','iuml;','iuml','kappa;','lArr;','lambda;','lang;', |
|
| 97 | - 'laquo;','laquo','larr;','lceil;','ldquo;','le;','lfloor;','lowast;','loz;', |
|
| 98 | - 'lrm;','lsaquo;','lsquo;','lt;','lt','macr;','macr','mdash;','micro;','micro', |
|
| 99 | - 'middot;','middot','minus;','mu;','nabla;','nbsp;','nbsp','ndash;','ne;', |
|
| 100 | - 'ni;','not;','not','notin;','nsub;','ntilde;','ntilde','nu;','oacute;', |
|
| 101 | - 'oacute','ocirc;','ocirc','oelig;','ograve;','ograve','oline;','omega;', |
|
| 102 | - 'omicron;','oplus;','or;','ordf;','ordf','ordm;','ordm','oslash;','oslash', |
|
| 103 | - 'otilde;','otilde','otimes;','ouml;','ouml','para;','para','part;','permil;', |
|
| 104 | - 'perp;','phi;','pi;','piv;','plusmn;','plusmn','pound;','pound','prime;', |
|
| 105 | - 'prod;','prop;','psi;','quot;','quot','rArr;','radic;','rang;','raquo;', |
|
| 106 | - 'raquo','rarr;','rceil;','rdquo;','real;','reg;','reg','rfloor;','rho;', |
|
| 107 | - 'rlm;','rsaquo;','rsquo;','sbquo;','scaron;','sdot;','sect;','sect','shy;', |
|
| 108 | - 'shy','sigma;','sigmaf;','sim;','spades;','sub;','sube;','sum;','sup1;', |
|
| 109 | - 'sup1','sup2;','sup2','sup3;','sup3','sup;','supe;','szlig;','szlig','tau;', |
|
| 110 | - 'there4;','theta;','thetasym;','thinsp;','thorn;','thorn','tilde;','times;', |
|
| 111 | - 'times','trade;','uArr;','uacute;','uacute','uarr;','ucirc;','ucirc', |
|
| 112 | - 'ugrave;','ugrave','uml;','uml','upsih;','upsilon;','uuml;','uuml','weierp;', |
|
| 113 | - 'xi;','yacute;','yacute','yen;','yen','yuml;','yuml','zeta;','zwj;','zwnj;'); |
|
| 114 | - |
|
| 115 | - const PCDATA = 0; |
|
| 116 | - const RCDATA = 1; |
|
| 117 | - const CDATA = 2; |
|
| 118 | - const PLAINTEXT = 3; |
|
| 119 | - |
|
| 120 | - const DOCTYPE = 0; |
|
| 121 | - const STARTTAG = 1; |
|
| 122 | - const ENDTAG = 2; |
|
| 123 | - const COMMENT = 3; |
|
| 124 | - const CHARACTR = 4; |
|
| 125 | - const EOF = 5; |
|
| 126 | - |
|
| 127 | - public function __construct($data) { |
|
| 128 | - |
|
| 129 | - $this->data = $data; |
|
| 130 | - $this->char = -1; |
|
| 131 | - $this->EOF = strlen($data); |
|
| 132 | - $this->tree = new HTML5TreeConstructer; |
|
| 133 | - $this->content_model = self::PCDATA; |
|
| 134 | - |
|
| 135 | - $this->state = 'data'; |
|
| 136 | - |
|
| 137 | - while($this->state !== null) { |
|
| 138 | - $this->{$this->state.'State'}(); |
|
| 139 | - } |
|
| 140 | - } |
|
| 141 | - |
|
| 142 | - public function save() { |
|
| 143 | - return $this->tree->save(); |
|
| 144 | - } |
|
| 145 | - |
|
| 146 | - private function char() { |
|
| 147 | - return ($this->char < $this->EOF) |
|
| 148 | - ? $this->data[$this->char] |
|
| 149 | - : false; |
|
| 150 | - } |
|
| 151 | - |
|
| 152 | - private function character($s, $l = 0) { |
|
| 153 | - if($s + $l < $this->EOF) { |
|
| 154 | - if($l === 0) { |
|
| 155 | - return $this->data[$s]; |
|
| 156 | - } else { |
|
| 157 | - return substr($this->data, $s, $l); |
|
| 158 | - } |
|
| 159 | - } |
|
| 160 | - } |
|
| 161 | - |
|
| 162 | - private function characters($char_class, $start) { |
|
| 163 | - return preg_replace('#^(['.$char_class.']+).*#s', '\\1', substr($this->data, $start)); |
|
| 164 | - } |
|
| 165 | - |
|
| 166 | - private function dataState() { |
|
| 167 | - // Consume the next input character |
|
| 168 | - $this->char++; |
|
| 169 | - $char = $this->char(); |
|
| 170 | - |
|
| 171 | - if($char === '&' && ($this->content_model === self::PCDATA || $this->content_model === self::RCDATA)) { |
|
| 172 | - /* U+0026 AMPERSAND (&) |
|
| 64 | + private $data; |
|
| 65 | + private $char; |
|
| 66 | + private $EOF; |
|
| 67 | + private $state; |
|
| 68 | + private $tree; |
|
| 69 | + private $token; |
|
| 70 | + private $content_model; |
|
| 71 | + private $escape = false; |
|
| 72 | + private $entities = array('AElig;','AElig','AMP;','AMP','Aacute;','Aacute', |
|
| 73 | + 'Acirc;','Acirc','Agrave;','Agrave','Alpha;','Aring;','Aring','Atilde;', |
|
| 74 | + 'Atilde','Auml;','Auml','Beta;','COPY;','COPY','Ccedil;','Ccedil','Chi;', |
|
| 75 | + 'Dagger;','Delta;','ETH;','ETH','Eacute;','Eacute','Ecirc;','Ecirc','Egrave;', |
|
| 76 | + 'Egrave','Epsilon;','Eta;','Euml;','Euml','GT;','GT','Gamma;','Iacute;', |
|
| 77 | + 'Iacute','Icirc;','Icirc','Igrave;','Igrave','Iota;','Iuml;','Iuml','Kappa;', |
|
| 78 | + 'LT;','LT','Lambda;','Mu;','Ntilde;','Ntilde','Nu;','OElig;','Oacute;', |
|
| 79 | + 'Oacute','Ocirc;','Ocirc','Ograve;','Ograve','Omega;','Omicron;','Oslash;', |
|
| 80 | + 'Oslash','Otilde;','Otilde','Ouml;','Ouml','Phi;','Pi;','Prime;','Psi;', |
|
| 81 | + 'QUOT;','QUOT','REG;','REG','Rho;','Scaron;','Sigma;','THORN;','THORN', |
|
| 82 | + 'TRADE;','Tau;','Theta;','Uacute;','Uacute','Ucirc;','Ucirc','Ugrave;', |
|
| 83 | + 'Ugrave','Upsilon;','Uuml;','Uuml','Xi;','Yacute;','Yacute','Yuml;','Zeta;', |
|
| 84 | + 'aacute;','aacute','acirc;','acirc','acute;','acute','aelig;','aelig', |
|
| 85 | + 'agrave;','agrave','alefsym;','alpha;','amp;','amp','and;','ang;','apos;', |
|
| 86 | + 'aring;','aring','asymp;','atilde;','atilde','auml;','auml','bdquo;','beta;', |
|
| 87 | + 'brvbar;','brvbar','bull;','cap;','ccedil;','ccedil','cedil;','cedil', |
|
| 88 | + 'cent;','cent','chi;','circ;','clubs;','cong;','copy;','copy','crarr;', |
|
| 89 | + 'cup;','curren;','curren','dArr;','dagger;','darr;','deg;','deg','delta;', |
|
| 90 | + 'diams;','divide;','divide','eacute;','eacute','ecirc;','ecirc','egrave;', |
|
| 91 | + 'egrave','empty;','emsp;','ensp;','epsilon;','equiv;','eta;','eth;','eth', |
|
| 92 | + 'euml;','euml','euro;','exist;','fnof;','forall;','frac12;','frac12', |
|
| 93 | + 'frac14;','frac14','frac34;','frac34','frasl;','gamma;','ge;','gt;','gt', |
|
| 94 | + 'hArr;','harr;','hearts;','hellip;','iacute;','iacute','icirc;','icirc', |
|
| 95 | + 'iexcl;','iexcl','igrave;','igrave','image;','infin;','int;','iota;', |
|
| 96 | + 'iquest;','iquest','isin;','iuml;','iuml','kappa;','lArr;','lambda;','lang;', |
|
| 97 | + 'laquo;','laquo','larr;','lceil;','ldquo;','le;','lfloor;','lowast;','loz;', |
|
| 98 | + 'lrm;','lsaquo;','lsquo;','lt;','lt','macr;','macr','mdash;','micro;','micro', |
|
| 99 | + 'middot;','middot','minus;','mu;','nabla;','nbsp;','nbsp','ndash;','ne;', |
|
| 100 | + 'ni;','not;','not','notin;','nsub;','ntilde;','ntilde','nu;','oacute;', |
|
| 101 | + 'oacute','ocirc;','ocirc','oelig;','ograve;','ograve','oline;','omega;', |
|
| 102 | + 'omicron;','oplus;','or;','ordf;','ordf','ordm;','ordm','oslash;','oslash', |
|
| 103 | + 'otilde;','otilde','otimes;','ouml;','ouml','para;','para','part;','permil;', |
|
| 104 | + 'perp;','phi;','pi;','piv;','plusmn;','plusmn','pound;','pound','prime;', |
|
| 105 | + 'prod;','prop;','psi;','quot;','quot','rArr;','radic;','rang;','raquo;', |
|
| 106 | + 'raquo','rarr;','rceil;','rdquo;','real;','reg;','reg','rfloor;','rho;', |
|
| 107 | + 'rlm;','rsaquo;','rsquo;','sbquo;','scaron;','sdot;','sect;','sect','shy;', |
|
| 108 | + 'shy','sigma;','sigmaf;','sim;','spades;','sub;','sube;','sum;','sup1;', |
|
| 109 | + 'sup1','sup2;','sup2','sup3;','sup3','sup;','supe;','szlig;','szlig','tau;', |
|
| 110 | + 'there4;','theta;','thetasym;','thinsp;','thorn;','thorn','tilde;','times;', |
|
| 111 | + 'times','trade;','uArr;','uacute;','uacute','uarr;','ucirc;','ucirc', |
|
| 112 | + 'ugrave;','ugrave','uml;','uml','upsih;','upsilon;','uuml;','uuml','weierp;', |
|
| 113 | + 'xi;','yacute;','yacute','yen;','yen','yuml;','yuml','zeta;','zwj;','zwnj;'); |
|
| 114 | + |
|
| 115 | + const PCDATA = 0; |
|
| 116 | + const RCDATA = 1; |
|
| 117 | + const CDATA = 2; |
|
| 118 | + const PLAINTEXT = 3; |
|
| 119 | + |
|
| 120 | + const DOCTYPE = 0; |
|
| 121 | + const STARTTAG = 1; |
|
| 122 | + const ENDTAG = 2; |
|
| 123 | + const COMMENT = 3; |
|
| 124 | + const CHARACTR = 4; |
|
| 125 | + const EOF = 5; |
|
| 126 | + |
|
| 127 | + public function __construct($data) { |
|
| 128 | + |
|
| 129 | + $this->data = $data; |
|
| 130 | + $this->char = -1; |
|
| 131 | + $this->EOF = strlen($data); |
|
| 132 | + $this->tree = new HTML5TreeConstructer; |
|
| 133 | + $this->content_model = self::PCDATA; |
|
| 134 | + |
|
| 135 | + $this->state = 'data'; |
|
| 136 | + |
|
| 137 | + while($this->state !== null) { |
|
| 138 | + $this->{$this->state.'State'}(); |
|
| 139 | + } |
|
| 140 | + } |
|
| 141 | + |
|
| 142 | + public function save() { |
|
| 143 | + return $this->tree->save(); |
|
| 144 | + } |
|
| 145 | + |
|
| 146 | + private function char() { |
|
| 147 | + return ($this->char < $this->EOF) |
|
| 148 | + ? $this->data[$this->char] |
|
| 149 | + : false; |
|
| 150 | + } |
|
| 151 | + |
|
| 152 | + private function character($s, $l = 0) { |
|
| 153 | + if($s + $l < $this->EOF) { |
|
| 154 | + if($l === 0) { |
|
| 155 | + return $this->data[$s]; |
|
| 156 | + } else { |
|
| 157 | + return substr($this->data, $s, $l); |
|
| 158 | + } |
|
| 159 | + } |
|
| 160 | + } |
|
| 161 | + |
|
| 162 | + private function characters($char_class, $start) { |
|
| 163 | + return preg_replace('#^(['.$char_class.']+).*#s', '\\1', substr($this->data, $start)); |
|
| 164 | + } |
|
| 165 | + |
|
| 166 | + private function dataState() { |
|
| 167 | + // Consume the next input character |
|
| 168 | + $this->char++; |
|
| 169 | + $char = $this->char(); |
|
| 170 | + |
|
| 171 | + if($char === '&' && ($this->content_model === self::PCDATA || $this->content_model === self::RCDATA)) { |
|
| 172 | + /* U+0026 AMPERSAND (&) |
|
| 173 | 173 | When the content model flag is set to one of the PCDATA or RCDATA |
| 174 | 174 | states: switch to the entity data state. Otherwise: treat it as per |
| 175 | 175 | the "anything else" entry below. */ |
| 176 | - $this->state = 'entityData'; |
|
| 176 | + $this->state = 'entityData'; |
|
| 177 | 177 | |
| 178 | - } elseif($char === '-') { |
|
| 179 | - /* If the content model flag is set to either the RCDATA state or |
|
| 178 | + } elseif($char === '-') { |
|
| 179 | + /* If the content model flag is set to either the RCDATA state or |
|
| 180 | 180 | the CDATA state, and the escape flag is false, and there are at |
| 181 | 181 | least three characters before this one in the input stream, and the |
| 182 | 182 | last four characters in the input stream, including this one, are |
| 183 | 183 | U+003C LESS-THAN SIGN, U+0021 EXCLAMATION MARK, U+002D HYPHEN-MINUS, |
| 184 | 184 | and U+002D HYPHEN-MINUS ("<!--"), then set the escape flag to true. */ |
| 185 | - if(($this->content_model === self::RCDATA || $this->content_model === |
|
| 186 | - self::CDATA) && $this->escape === false && |
|
| 187 | - $this->char >= 3 && $this->character($this->char - 4, 4) === '<!--') { |
|
| 188 | - $this->escape = true; |
|
| 189 | - } |
|
| 185 | + if(($this->content_model === self::RCDATA || $this->content_model === |
|
| 186 | + self::CDATA) && $this->escape === false && |
|
| 187 | + $this->char >= 3 && $this->character($this->char - 4, 4) === '<!--') { |
|
| 188 | + $this->escape = true; |
|
| 189 | + } |
|
| 190 | 190 | |
| 191 | - /* In any case, emit the input character as a character token. Stay |
|
| 191 | + /* In any case, emit the input character as a character token. Stay |
|
| 192 | 192 | in the data state. */ |
| 193 | - $this->emitToken(array( |
|
| 194 | - 'type' => self::CHARACTR, |
|
| 195 | - 'data' => $char |
|
| 196 | - )); |
|
| 197 | - |
|
| 198 | - /* U+003C LESS-THAN SIGN (<) */ |
|
| 199 | - } elseif($char === '<' && ($this->content_model === self::PCDATA || |
|
| 200 | - (($this->content_model === self::RCDATA || |
|
| 201 | - $this->content_model === self::CDATA) && $this->escape === false))) { |
|
| 202 | - /* When the content model flag is set to the PCDATA state: switch |
|
| 193 | + $this->emitToken(array( |
|
| 194 | + 'type' => self::CHARACTR, |
|
| 195 | + 'data' => $char |
|
| 196 | + )); |
|
| 197 | + |
|
| 198 | + /* U+003C LESS-THAN SIGN (<) */ |
|
| 199 | + } elseif($char === '<' && ($this->content_model === self::PCDATA || |
|
| 200 | + (($this->content_model === self::RCDATA || |
|
| 201 | + $this->content_model === self::CDATA) && $this->escape === false))) { |
|
| 202 | + /* When the content model flag is set to the PCDATA state: switch |
|
| 203 | 203 | to the tag open state. |
| 204 | 204 | |
| 205 | 205 | When the content model flag is set to either the RCDATA state or |
@@ -207,171 +207,171 @@ discard block |
||
| 207 | 207 | open state. |
| 208 | 208 | |
| 209 | 209 | Otherwise: treat it as per the "anything else" entry below. */ |
| 210 | - $this->state = 'tagOpen'; |
|
| 210 | + $this->state = 'tagOpen'; |
|
| 211 | 211 | |
| 212 | - /* U+003E GREATER-THAN SIGN (>) */ |
|
| 213 | - } elseif($char === '>') { |
|
| 214 | - /* If the content model flag is set to either the RCDATA state or |
|
| 212 | + /* U+003E GREATER-THAN SIGN (>) */ |
|
| 213 | + } elseif($char === '>') { |
|
| 214 | + /* If the content model flag is set to either the RCDATA state or |
|
| 215 | 215 | the CDATA state, and the escape flag is true, and the last three |
| 216 | 216 | characters in the input stream including this one are U+002D |
| 217 | 217 | HYPHEN-MINUS, U+002D HYPHEN-MINUS, U+003E GREATER-THAN SIGN ("-->"), |
| 218 | 218 | set the escape flag to false. */ |
| 219 | - if(($this->content_model === self::RCDATA || |
|
| 220 | - $this->content_model === self::CDATA) && $this->escape === true && |
|
| 221 | - $this->character($this->char, 3) === '-->') { |
|
| 222 | - $this->escape = false; |
|
| 223 | - } |
|
| 219 | + if(($this->content_model === self::RCDATA || |
|
| 220 | + $this->content_model === self::CDATA) && $this->escape === true && |
|
| 221 | + $this->character($this->char, 3) === '-->') { |
|
| 222 | + $this->escape = false; |
|
| 223 | + } |
|
| 224 | 224 | |
| 225 | - /* In any case, emit the input character as a character token. |
|
| 225 | + /* In any case, emit the input character as a character token. |
|
| 226 | 226 | Stay in the data state. */ |
| 227 | - $this->emitToken(array( |
|
| 228 | - 'type' => self::CHARACTR, |
|
| 229 | - 'data' => $char |
|
| 230 | - )); |
|
| 227 | + $this->emitToken(array( |
|
| 228 | + 'type' => self::CHARACTR, |
|
| 229 | + 'data' => $char |
|
| 230 | + )); |
|
| 231 | 231 | |
| 232 | - } elseif($this->char === $this->EOF) { |
|
| 233 | - /* EOF |
|
| 232 | + } elseif($this->char === $this->EOF) { |
|
| 233 | + /* EOF |
|
| 234 | 234 | Emit an end-of-file token. */ |
| 235 | - $this->EOF(); |
|
| 235 | + $this->EOF(); |
|
| 236 | 236 | |
| 237 | - } elseif($this->content_model === self::PLAINTEXT) { |
|
| 238 | - /* When the content model flag is set to the PLAINTEXT state |
|
| 237 | + } elseif($this->content_model === self::PLAINTEXT) { |
|
| 238 | + /* When the content model flag is set to the PLAINTEXT state |
|
| 239 | 239 | THIS DIFFERS GREATLY FROM THE SPEC: Get the remaining characters of |
| 240 | 240 | the text and emit it as a character token. */ |
| 241 | - $this->emitToken(array( |
|
| 242 | - 'type' => self::CHARACTR, |
|
| 243 | - 'data' => substr($this->data, $this->char) |
|
| 244 | - )); |
|
| 241 | + $this->emitToken(array( |
|
| 242 | + 'type' => self::CHARACTR, |
|
| 243 | + 'data' => substr($this->data, $this->char) |
|
| 244 | + )); |
|
| 245 | 245 | |
| 246 | - $this->EOF(); |
|
| 246 | + $this->EOF(); |
|
| 247 | 247 | |
| 248 | - } else { |
|
| 249 | - /* Anything else |
|
| 248 | + } else { |
|
| 249 | + /* Anything else |
|
| 250 | 250 | THIS DIFFERS GREATLY FROM THE SPEC: Get as many character that |
| 251 | 251 | otherwise would also be treated as a character token and emit it |
| 252 | 252 | as a single character token. Stay in the data state. */ |
| 253 | - $len = strcspn($this->data, '<&', $this->char); |
|
| 254 | - $char = substr($this->data, $this->char, $len); |
|
| 255 | - $this->char += $len - 1; |
|
| 256 | - |
|
| 257 | - $this->emitToken(array( |
|
| 258 | - 'type' => self::CHARACTR, |
|
| 259 | - 'data' => $char |
|
| 260 | - )); |
|
| 261 | - |
|
| 262 | - $this->state = 'data'; |
|
| 263 | - } |
|
| 264 | - } |
|
| 265 | - |
|
| 266 | - private function entityDataState() { |
|
| 267 | - // Attempt to consume an entity. |
|
| 268 | - $entity = $this->entity(); |
|
| 269 | - |
|
| 270 | - // If nothing is returned, emit a U+0026 AMPERSAND character token. |
|
| 271 | - // Otherwise, emit the character token that was returned. |
|
| 272 | - $char = (!$entity) ? '&' : $entity; |
|
| 273 | - $this->emitToken(array( |
|
| 274 | - 'type' => self::CHARACTR, |
|
| 275 | - 'data' => $char |
|
| 276 | - )); |
|
| 277 | - |
|
| 278 | - // Finally, switch to the data state. |
|
| 279 | - $this->state = 'data'; |
|
| 280 | - } |
|
| 281 | - |
|
| 282 | - private function tagOpenState() { |
|
| 283 | - switch($this->content_model) { |
|
| 284 | - case self::RCDATA: |
|
| 285 | - case self::CDATA: |
|
| 286 | - /* If the next input character is a U+002F SOLIDUS (/) character, |
|
| 253 | + $len = strcspn($this->data, '<&', $this->char); |
|
| 254 | + $char = substr($this->data, $this->char, $len); |
|
| 255 | + $this->char += $len - 1; |
|
| 256 | + |
|
| 257 | + $this->emitToken(array( |
|
| 258 | + 'type' => self::CHARACTR, |
|
| 259 | + 'data' => $char |
|
| 260 | + )); |
|
| 261 | + |
|
| 262 | + $this->state = 'data'; |
|
| 263 | + } |
|
| 264 | + } |
|
| 265 | + |
|
| 266 | + private function entityDataState() { |
|
| 267 | + // Attempt to consume an entity. |
|
| 268 | + $entity = $this->entity(); |
|
| 269 | + |
|
| 270 | + // If nothing is returned, emit a U+0026 AMPERSAND character token. |
|
| 271 | + // Otherwise, emit the character token that was returned. |
|
| 272 | + $char = (!$entity) ? '&' : $entity; |
|
| 273 | + $this->emitToken(array( |
|
| 274 | + 'type' => self::CHARACTR, |
|
| 275 | + 'data' => $char |
|
| 276 | + )); |
|
| 277 | + |
|
| 278 | + // Finally, switch to the data state. |
|
| 279 | + $this->state = 'data'; |
|
| 280 | + } |
|
| 281 | + |
|
| 282 | + private function tagOpenState() { |
|
| 283 | + switch($this->content_model) { |
|
| 284 | + case self::RCDATA: |
|
| 285 | + case self::CDATA: |
|
| 286 | + /* If the next input character is a U+002F SOLIDUS (/) character, |
|
| 287 | 287 | consume it and switch to the close tag open state. If the next |
| 288 | 288 | input character is not a U+002F SOLIDUS (/) character, emit a |
| 289 | 289 | U+003C LESS-THAN SIGN character token and switch to the data |
| 290 | 290 | state to process the next input character. */ |
| 291 | - if($this->character($this->char + 1) === '/') { |
|
| 292 | - $this->char++; |
|
| 293 | - $this->state = 'closeTagOpen'; |
|
| 294 | - |
|
| 295 | - } else { |
|
| 296 | - $this->emitToken(array( |
|
| 297 | - 'type' => self::CHARACTR, |
|
| 298 | - 'data' => '<' |
|
| 299 | - )); |
|
| 300 | - |
|
| 301 | - $this->state = 'data'; |
|
| 302 | - } |
|
| 303 | - break; |
|
| 304 | - |
|
| 305 | - case self::PCDATA: |
|
| 306 | - // If the content model flag is set to the PCDATA state |
|
| 307 | - // Consume the next input character: |
|
| 308 | - $this->char++; |
|
| 309 | - $char = $this->char(); |
|
| 310 | - |
|
| 311 | - if($char === '!') { |
|
| 312 | - /* U+0021 EXCLAMATION MARK (!) |
|
| 291 | + if($this->character($this->char + 1) === '/') { |
|
| 292 | + $this->char++; |
|
| 293 | + $this->state = 'closeTagOpen'; |
|
| 294 | + |
|
| 295 | + } else { |
|
| 296 | + $this->emitToken(array( |
|
| 297 | + 'type' => self::CHARACTR, |
|
| 298 | + 'data' => '<' |
|
| 299 | + )); |
|
| 300 | + |
|
| 301 | + $this->state = 'data'; |
|
| 302 | + } |
|
| 303 | + break; |
|
| 304 | + |
|
| 305 | + case self::PCDATA: |
|
| 306 | + // If the content model flag is set to the PCDATA state |
|
| 307 | + // Consume the next input character: |
|
| 308 | + $this->char++; |
|
| 309 | + $char = $this->char(); |
|
| 310 | + |
|
| 311 | + if($char === '!') { |
|
| 312 | + /* U+0021 EXCLAMATION MARK (!) |
|
| 313 | 313 | Switch to the markup declaration open state. */ |
| 314 | - $this->state = 'markupDeclarationOpen'; |
|
| 314 | + $this->state = 'markupDeclarationOpen'; |
|
| 315 | 315 | |
| 316 | - } elseif($char === '/') { |
|
| 317 | - /* U+002F SOLIDUS (/) |
|
| 316 | + } elseif($char === '/') { |
|
| 317 | + /* U+002F SOLIDUS (/) |
|
| 318 | 318 | Switch to the close tag open state. */ |
| 319 | - $this->state = 'closeTagOpen'; |
|
| 319 | + $this->state = 'closeTagOpen'; |
|
| 320 | 320 | |
| 321 | - } elseif(preg_match('/^[A-Za-z]$/', $char)) { |
|
| 322 | - /* U+0041 LATIN LETTER A through to U+005A LATIN LETTER Z |
|
| 321 | + } elseif(preg_match('/^[A-Za-z]$/', $char)) { |
|
| 322 | + /* U+0041 LATIN LETTER A through to U+005A LATIN LETTER Z |
|
| 323 | 323 | Create a new start tag token, set its tag name to the lowercase |
| 324 | 324 | version of the input character (add 0x0020 to the character's code |
| 325 | 325 | point), then switch to the tag name state. (Don't emit the token |
| 326 | 326 | yet; further details will be filled in before it is emitted.) */ |
| 327 | - $this->token = array( |
|
| 328 | - 'name' => strtolower($char), |
|
| 329 | - 'type' => self::STARTTAG, |
|
| 330 | - 'attr' => array() |
|
| 331 | - ); |
|
| 327 | + $this->token = array( |
|
| 328 | + 'name' => strtolower($char), |
|
| 329 | + 'type' => self::STARTTAG, |
|
| 330 | + 'attr' => array() |
|
| 331 | + ); |
|
| 332 | 332 | |
| 333 | - $this->state = 'tagName'; |
|
| 333 | + $this->state = 'tagName'; |
|
| 334 | 334 | |
| 335 | - } elseif($char === '>') { |
|
| 336 | - /* U+003E GREATER-THAN SIGN (>) |
|
| 335 | + } elseif($char === '>') { |
|
| 336 | + /* U+003E GREATER-THAN SIGN (>) |
|
| 337 | 337 | Parse error. Emit a U+003C LESS-THAN SIGN character token and a |
| 338 | 338 | U+003E GREATER-THAN SIGN character token. Switch to the data state. */ |
| 339 | - $this->emitToken(array( |
|
| 340 | - 'type' => self::CHARACTR, |
|
| 341 | - 'data' => '<>' |
|
| 342 | - )); |
|
| 339 | + $this->emitToken(array( |
|
| 340 | + 'type' => self::CHARACTR, |
|
| 341 | + 'data' => '<>' |
|
| 342 | + )); |
|
| 343 | 343 | |
| 344 | - $this->state = 'data'; |
|
| 344 | + $this->state = 'data'; |
|
| 345 | 345 | |
| 346 | - } elseif($char === '?') { |
|
| 347 | - /* U+003F QUESTION MARK (?) |
|
| 346 | + } elseif($char === '?') { |
|
| 347 | + /* U+003F QUESTION MARK (?) |
|
| 348 | 348 | Parse error. Switch to the bogus comment state. */ |
| 349 | - $this->state = 'bogusComment'; |
|
| 349 | + $this->state = 'bogusComment'; |
|
| 350 | 350 | |
| 351 | - } else { |
|
| 352 | - /* Anything else |
|
| 351 | + } else { |
|
| 352 | + /* Anything else |
|
| 353 | 353 | Parse error. Emit a U+003C LESS-THAN SIGN character token and |
| 354 | 354 | reconsume the current input character in the data state. */ |
| 355 | - $this->emitToken(array( |
|
| 356 | - 'type' => self::CHARACTR, |
|
| 357 | - 'data' => '<' |
|
| 358 | - )); |
|
| 359 | - |
|
| 360 | - $this->char--; |
|
| 361 | - $this->state = 'data'; |
|
| 362 | - } |
|
| 363 | - break; |
|
| 364 | - } |
|
| 365 | - } |
|
| 366 | - |
|
| 367 | - private function closeTagOpenState() { |
|
| 368 | - $next_node = strtolower($this->characters('A-Za-z', $this->char + 1)); |
|
| 369 | - $the_same = count($this->tree->stack) > 0 && $next_node === end($this->tree->stack)->nodeName; |
|
| 370 | - |
|
| 371 | - if(($this->content_model === self::RCDATA || $this->content_model === self::CDATA) && |
|
| 372 | - (!$the_same || ($the_same && (!preg_match('/[\t\n\x0b\x0c >\/]/', |
|
| 373 | - $this->character($this->char + 1 + strlen($next_node))) || $this->EOF === $this->char)))) { |
|
| 374 | - /* If the content model flag is set to the RCDATA or CDATA states then |
|
| 355 | + $this->emitToken(array( |
|
| 356 | + 'type' => self::CHARACTR, |
|
| 357 | + 'data' => '<' |
|
| 358 | + )); |
|
| 359 | + |
|
| 360 | + $this->char--; |
|
| 361 | + $this->state = 'data'; |
|
| 362 | + } |
|
| 363 | + break; |
|
| 364 | + } |
|
| 365 | + } |
|
| 366 | + |
|
| 367 | + private function closeTagOpenState() { |
|
| 368 | + $next_node = strtolower($this->characters('A-Za-z', $this->char + 1)); |
|
| 369 | + $the_same = count($this->tree->stack) > 0 && $next_node === end($this->tree->stack)->nodeName; |
|
| 370 | + |
|
| 371 | + if(($this->content_model === self::RCDATA || $this->content_model === self::CDATA) && |
|
| 372 | + (!$the_same || ($the_same && (!preg_match('/[\t\n\x0b\x0c >\/]/', |
|
| 373 | + $this->character($this->char + 1 + strlen($next_node))) || $this->EOF === $this->char)))) { |
|
| 374 | + /* If the content model flag is set to the RCDATA or CDATA states then |
|
| 375 | 375 | examine the next few characters. If they do not match the tag name of |
| 376 | 376 | the last start tag token emitted (case insensitively), or if they do but |
| 377 | 377 | they are not immediately followed by one of the following characters: |
@@ -386,426 +386,426 @@ discard block |
||
| 386 | 386 | ...then there is a parse error. Emit a U+003C LESS-THAN SIGN character |
| 387 | 387 | token, a U+002F SOLIDUS character token, and switch to the data state |
| 388 | 388 | to process the next input character. */ |
| 389 | - $this->emitToken(array( |
|
| 390 | - 'type' => self::CHARACTR, |
|
| 391 | - 'data' => '</' |
|
| 392 | - )); |
|
| 389 | + $this->emitToken(array( |
|
| 390 | + 'type' => self::CHARACTR, |
|
| 391 | + 'data' => '</' |
|
| 392 | + )); |
|
| 393 | 393 | |
| 394 | - $this->state = 'data'; |
|
| 394 | + $this->state = 'data'; |
|
| 395 | 395 | |
| 396 | - } else { |
|
| 397 | - /* Otherwise, if the content model flag is set to the PCDATA state, |
|
| 396 | + } else { |
|
| 397 | + /* Otherwise, if the content model flag is set to the PCDATA state, |
|
| 398 | 398 | or if the next few characters do match that tag name, consume the |
| 399 | 399 | next input character: */ |
| 400 | - $this->char++; |
|
| 401 | - $char = $this->char(); |
|
| 400 | + $this->char++; |
|
| 401 | + $char = $this->char(); |
|
| 402 | 402 | |
| 403 | - if(preg_match('/^[A-Za-z]$/', $char)) { |
|
| 404 | - /* U+0041 LATIN LETTER A through to U+005A LATIN LETTER Z |
|
| 403 | + if(preg_match('/^[A-Za-z]$/', $char)) { |
|
| 404 | + /* U+0041 LATIN LETTER A through to U+005A LATIN LETTER Z |
|
| 405 | 405 | Create a new end tag token, set its tag name to the lowercase version |
| 406 | 406 | of the input character (add 0x0020 to the character's code point), then |
| 407 | 407 | switch to the tag name state. (Don't emit the token yet; further details |
| 408 | 408 | will be filled in before it is emitted.) */ |
| 409 | - $this->token = array( |
|
| 410 | - 'name' => strtolower($char), |
|
| 411 | - 'type' => self::ENDTAG |
|
| 412 | - ); |
|
| 409 | + $this->token = array( |
|
| 410 | + 'name' => strtolower($char), |
|
| 411 | + 'type' => self::ENDTAG |
|
| 412 | + ); |
|
| 413 | 413 | |
| 414 | - $this->state = 'tagName'; |
|
| 414 | + $this->state = 'tagName'; |
|
| 415 | 415 | |
| 416 | - } elseif($char === '>') { |
|
| 417 | - /* U+003E GREATER-THAN SIGN (>) |
|
| 416 | + } elseif($char === '>') { |
|
| 417 | + /* U+003E GREATER-THAN SIGN (>) |
|
| 418 | 418 | Parse error. Switch to the data state. */ |
| 419 | - $this->state = 'data'; |
|
| 419 | + $this->state = 'data'; |
|
| 420 | 420 | |
| 421 | - } elseif($this->char === $this->EOF) { |
|
| 422 | - /* EOF |
|
| 421 | + } elseif($this->char === $this->EOF) { |
|
| 422 | + /* EOF |
|
| 423 | 423 | Parse error. Emit a U+003C LESS-THAN SIGN character token and a U+002F |
| 424 | 424 | SOLIDUS character token. Reconsume the EOF character in the data state. */ |
| 425 | - $this->emitToken(array( |
|
| 426 | - 'type' => self::CHARACTR, |
|
| 427 | - 'data' => '</' |
|
| 428 | - )); |
|
| 429 | - |
|
| 430 | - $this->char--; |
|
| 431 | - $this->state = 'data'; |
|
| 432 | - |
|
| 433 | - } else { |
|
| 434 | - /* Parse error. Switch to the bogus comment state. */ |
|
| 435 | - $this->state = 'bogusComment'; |
|
| 436 | - } |
|
| 437 | - } |
|
| 438 | - } |
|
| 439 | - |
|
| 440 | - private function tagNameState() { |
|
| 441 | - // Consume the next input character: |
|
| 442 | - $this->char++; |
|
| 443 | - $char = $this->character($this->char); |
|
| 444 | - |
|
| 445 | - if(preg_match('/^[\t\n\x0b\x0c ]$/', $char)) { |
|
| 446 | - /* U+0009 CHARACTER TABULATION |
|
| 425 | + $this->emitToken(array( |
|
| 426 | + 'type' => self::CHARACTR, |
|
| 427 | + 'data' => '</' |
|
| 428 | + )); |
|
| 429 | + |
|
| 430 | + $this->char--; |
|
| 431 | + $this->state = 'data'; |
|
| 432 | + |
|
| 433 | + } else { |
|
| 434 | + /* Parse error. Switch to the bogus comment state. */ |
|
| 435 | + $this->state = 'bogusComment'; |
|
| 436 | + } |
|
| 437 | + } |
|
| 438 | + } |
|
| 439 | + |
|
| 440 | + private function tagNameState() { |
|
| 441 | + // Consume the next input character: |
|
| 442 | + $this->char++; |
|
| 443 | + $char = $this->character($this->char); |
|
| 444 | + |
|
| 445 | + if(preg_match('/^[\t\n\x0b\x0c ]$/', $char)) { |
|
| 446 | + /* U+0009 CHARACTER TABULATION |
|
| 447 | 447 | U+000A LINE FEED (LF) |
| 448 | 448 | U+000B LINE TABULATION |
| 449 | 449 | U+000C FORM FEED (FF) |
| 450 | 450 | U+0020 SPACE |
| 451 | 451 | Switch to the before attribute name state. */ |
| 452 | - $this->state = 'beforeAttributeName'; |
|
| 452 | + $this->state = 'beforeAttributeName'; |
|
| 453 | 453 | |
| 454 | - } elseif($char === '>') { |
|
| 455 | - /* U+003E GREATER-THAN SIGN (>) |
|
| 454 | + } elseif($char === '>') { |
|
| 455 | + /* U+003E GREATER-THAN SIGN (>) |
|
| 456 | 456 | Emit the current tag token. Switch to the data state. */ |
| 457 | - $this->emitToken($this->token); |
|
| 458 | - $this->state = 'data'; |
|
| 457 | + $this->emitToken($this->token); |
|
| 458 | + $this->state = 'data'; |
|
| 459 | 459 | |
| 460 | - } elseif($this->char === $this->EOF) { |
|
| 461 | - /* EOF |
|
| 460 | + } elseif($this->char === $this->EOF) { |
|
| 461 | + /* EOF |
|
| 462 | 462 | Parse error. Emit the current tag token. Reconsume the EOF |
| 463 | 463 | character in the data state. */ |
| 464 | - $this->emitToken($this->token); |
|
| 464 | + $this->emitToken($this->token); |
|
| 465 | 465 | |
| 466 | - $this->char--; |
|
| 467 | - $this->state = 'data'; |
|
| 466 | + $this->char--; |
|
| 467 | + $this->state = 'data'; |
|
| 468 | 468 | |
| 469 | - } elseif($char === '/') { |
|
| 470 | - /* U+002F SOLIDUS (/) |
|
| 469 | + } elseif($char === '/') { |
|
| 470 | + /* U+002F SOLIDUS (/) |
|
| 471 | 471 | Parse error unless this is a permitted slash. Switch to the before |
| 472 | 472 | attribute name state. */ |
| 473 | - $this->state = 'beforeAttributeName'; |
|
| 473 | + $this->state = 'beforeAttributeName'; |
|
| 474 | 474 | |
| 475 | - } else { |
|
| 476 | - /* Anything else |
|
| 475 | + } else { |
|
| 476 | + /* Anything else |
|
| 477 | 477 | Append the current input character to the current tag token's tag name. |
| 478 | 478 | Stay in the tag name state. */ |
| 479 | - $this->token['name'] .= strtolower($char); |
|
| 480 | - $this->state = 'tagName'; |
|
| 481 | - } |
|
| 482 | - } |
|
| 483 | - |
|
| 484 | - private function beforeAttributeNameState() { |
|
| 485 | - // Consume the next input character: |
|
| 486 | - $this->char++; |
|
| 487 | - $char = $this->character($this->char); |
|
| 488 | - |
|
| 489 | - if(preg_match('/^[\t\n\x0b\x0c ]$/', $char)) { |
|
| 490 | - /* U+0009 CHARACTER TABULATION |
|
| 479 | + $this->token['name'] .= strtolower($char); |
|
| 480 | + $this->state = 'tagName'; |
|
| 481 | + } |
|
| 482 | + } |
|
| 483 | + |
|
| 484 | + private function beforeAttributeNameState() { |
|
| 485 | + // Consume the next input character: |
|
| 486 | + $this->char++; |
|
| 487 | + $char = $this->character($this->char); |
|
| 488 | + |
|
| 489 | + if(preg_match('/^[\t\n\x0b\x0c ]$/', $char)) { |
|
| 490 | + /* U+0009 CHARACTER TABULATION |
|
| 491 | 491 | U+000A LINE FEED (LF) |
| 492 | 492 | U+000B LINE TABULATION |
| 493 | 493 | U+000C FORM FEED (FF) |
| 494 | 494 | U+0020 SPACE |
| 495 | 495 | Stay in the before attribute name state. */ |
| 496 | - $this->state = 'beforeAttributeName'; |
|
| 496 | + $this->state = 'beforeAttributeName'; |
|
| 497 | 497 | |
| 498 | - } elseif($char === '>') { |
|
| 499 | - /* U+003E GREATER-THAN SIGN (>) |
|
| 498 | + } elseif($char === '>') { |
|
| 499 | + /* U+003E GREATER-THAN SIGN (>) |
|
| 500 | 500 | Emit the current tag token. Switch to the data state. */ |
| 501 | - $this->emitToken($this->token); |
|
| 502 | - $this->state = 'data'; |
|
| 501 | + $this->emitToken($this->token); |
|
| 502 | + $this->state = 'data'; |
|
| 503 | 503 | |
| 504 | - } elseif($char === '/') { |
|
| 505 | - /* U+002F SOLIDUS (/) |
|
| 504 | + } elseif($char === '/') { |
|
| 505 | + /* U+002F SOLIDUS (/) |
|
| 506 | 506 | Parse error unless this is a permitted slash. Stay in the before |
| 507 | 507 | attribute name state. */ |
| 508 | - $this->state = 'beforeAttributeName'; |
|
| 508 | + $this->state = 'beforeAttributeName'; |
|
| 509 | 509 | |
| 510 | - } elseif($this->char === $this->EOF) { |
|
| 511 | - /* EOF |
|
| 510 | + } elseif($this->char === $this->EOF) { |
|
| 511 | + /* EOF |
|
| 512 | 512 | Parse error. Emit the current tag token. Reconsume the EOF |
| 513 | 513 | character in the data state. */ |
| 514 | - $this->emitToken($this->token); |
|
| 514 | + $this->emitToken($this->token); |
|
| 515 | 515 | |
| 516 | - $this->char--; |
|
| 517 | - $this->state = 'data'; |
|
| 516 | + $this->char--; |
|
| 517 | + $this->state = 'data'; |
|
| 518 | 518 | |
| 519 | - } else { |
|
| 520 | - /* Anything else |
|
| 519 | + } else { |
|
| 520 | + /* Anything else |
|
| 521 | 521 | Start a new attribute in the current tag token. Set that attribute's |
| 522 | 522 | name to the current input character, and its value to the empty string. |
| 523 | 523 | Switch to the attribute name state. */ |
| 524 | - $this->token['attr'][] = array( |
|
| 525 | - 'name' => strtolower($char), |
|
| 526 | - 'value' => null |
|
| 527 | - ); |
|
| 528 | - |
|
| 529 | - $this->state = 'attributeName'; |
|
| 530 | - } |
|
| 531 | - } |
|
| 532 | - |
|
| 533 | - private function attributeNameState() { |
|
| 534 | - // Consume the next input character: |
|
| 535 | - $this->char++; |
|
| 536 | - $char = $this->character($this->char); |
|
| 537 | - |
|
| 538 | - if(preg_match('/^[\t\n\x0b\x0c ]$/', $char)) { |
|
| 539 | - /* U+0009 CHARACTER TABULATION |
|
| 524 | + $this->token['attr'][] = array( |
|
| 525 | + 'name' => strtolower($char), |
|
| 526 | + 'value' => null |
|
| 527 | + ); |
|
| 528 | + |
|
| 529 | + $this->state = 'attributeName'; |
|
| 530 | + } |
|
| 531 | + } |
|
| 532 | + |
|
| 533 | + private function attributeNameState() { |
|
| 534 | + // Consume the next input character: |
|
| 535 | + $this->char++; |
|
| 536 | + $char = $this->character($this->char); |
|
| 537 | + |
|
| 538 | + if(preg_match('/^[\t\n\x0b\x0c ]$/', $char)) { |
|
| 539 | + /* U+0009 CHARACTER TABULATION |
|
| 540 | 540 | U+000A LINE FEED (LF) |
| 541 | 541 | U+000B LINE TABULATION |
| 542 | 542 | U+000C FORM FEED (FF) |
| 543 | 543 | U+0020 SPACE |
| 544 | 544 | Stay in the before attribute name state. */ |
| 545 | - $this->state = 'afterAttributeName'; |
|
| 545 | + $this->state = 'afterAttributeName'; |
|
| 546 | 546 | |
| 547 | - } elseif($char === '=') { |
|
| 548 | - /* U+003D EQUALS SIGN (=) |
|
| 547 | + } elseif($char === '=') { |
|
| 548 | + /* U+003D EQUALS SIGN (=) |
|
| 549 | 549 | Switch to the before attribute value state. */ |
| 550 | - $this->state = 'beforeAttributeValue'; |
|
| 550 | + $this->state = 'beforeAttributeValue'; |
|
| 551 | 551 | |
| 552 | - } elseif($char === '>') { |
|
| 553 | - /* U+003E GREATER-THAN SIGN (>) |
|
| 552 | + } elseif($char === '>') { |
|
| 553 | + /* U+003E GREATER-THAN SIGN (>) |
|
| 554 | 554 | Emit the current tag token. Switch to the data state. */ |
| 555 | - $this->emitToken($this->token); |
|
| 556 | - $this->state = 'data'; |
|
| 555 | + $this->emitToken($this->token); |
|
| 556 | + $this->state = 'data'; |
|
| 557 | 557 | |
| 558 | - } elseif($char === '/' && $this->character($this->char + 1) !== '>') { |
|
| 559 | - /* U+002F SOLIDUS (/) |
|
| 558 | + } elseif($char === '/' && $this->character($this->char + 1) !== '>') { |
|
| 559 | + /* U+002F SOLIDUS (/) |
|
| 560 | 560 | Parse error unless this is a permitted slash. Switch to the before |
| 561 | 561 | attribute name state. */ |
| 562 | - $this->state = 'beforeAttributeName'; |
|
| 562 | + $this->state = 'beforeAttributeName'; |
|
| 563 | 563 | |
| 564 | - } elseif($this->char === $this->EOF) { |
|
| 565 | - /* EOF |
|
| 564 | + } elseif($this->char === $this->EOF) { |
|
| 565 | + /* EOF |
|
| 566 | 566 | Parse error. Emit the current tag token. Reconsume the EOF |
| 567 | 567 | character in the data state. */ |
| 568 | - $this->emitToken($this->token); |
|
| 568 | + $this->emitToken($this->token); |
|
| 569 | 569 | |
| 570 | - $this->char--; |
|
| 571 | - $this->state = 'data'; |
|
| 570 | + $this->char--; |
|
| 571 | + $this->state = 'data'; |
|
| 572 | 572 | |
| 573 | - } else { |
|
| 574 | - /* Anything else |
|
| 573 | + } else { |
|
| 574 | + /* Anything else |
|
| 575 | 575 | Append the current input character to the current attribute's name. |
| 576 | 576 | Stay in the attribute name state. */ |
| 577 | - $last = count($this->token['attr']) - 1; |
|
| 578 | - $this->token['attr'][$last]['name'] .= strtolower($char); |
|
| 577 | + $last = count($this->token['attr']) - 1; |
|
| 578 | + $this->token['attr'][$last]['name'] .= strtolower($char); |
|
| 579 | 579 | |
| 580 | - $this->state = 'attributeName'; |
|
| 581 | - } |
|
| 582 | - } |
|
| 580 | + $this->state = 'attributeName'; |
|
| 581 | + } |
|
| 582 | + } |
|
| 583 | 583 | |
| 584 | - private function afterAttributeNameState() { |
|
| 585 | - // Consume the next input character: |
|
| 586 | - $this->char++; |
|
| 587 | - $char = $this->character($this->char); |
|
| 584 | + private function afterAttributeNameState() { |
|
| 585 | + // Consume the next input character: |
|
| 586 | + $this->char++; |
|
| 587 | + $char = $this->character($this->char); |
|
| 588 | 588 | |
| 589 | - if(preg_match('/^[\t\n\x0b\x0c ]$/', $char)) { |
|
| 590 | - /* U+0009 CHARACTER TABULATION |
|
| 589 | + if(preg_match('/^[\t\n\x0b\x0c ]$/', $char)) { |
|
| 590 | + /* U+0009 CHARACTER TABULATION |
|
| 591 | 591 | U+000A LINE FEED (LF) |
| 592 | 592 | U+000B LINE TABULATION |
| 593 | 593 | U+000C FORM FEED (FF) |
| 594 | 594 | U+0020 SPACE |
| 595 | 595 | Stay in the after attribute name state. */ |
| 596 | - $this->state = 'afterAttributeName'; |
|
| 596 | + $this->state = 'afterAttributeName'; |
|
| 597 | 597 | |
| 598 | - } elseif($char === '=') { |
|
| 599 | - /* U+003D EQUALS SIGN (=) |
|
| 598 | + } elseif($char === '=') { |
|
| 599 | + /* U+003D EQUALS SIGN (=) |
|
| 600 | 600 | Switch to the before attribute value state. */ |
| 601 | - $this->state = 'beforeAttributeValue'; |
|
| 601 | + $this->state = 'beforeAttributeValue'; |
|
| 602 | 602 | |
| 603 | - } elseif($char === '>') { |
|
| 604 | - /* U+003E GREATER-THAN SIGN (>) |
|
| 603 | + } elseif($char === '>') { |
|
| 604 | + /* U+003E GREATER-THAN SIGN (>) |
|
| 605 | 605 | Emit the current tag token. Switch to the data state. */ |
| 606 | - $this->emitToken($this->token); |
|
| 607 | - $this->state = 'data'; |
|
| 606 | + $this->emitToken($this->token); |
|
| 607 | + $this->state = 'data'; |
|
| 608 | 608 | |
| 609 | - } elseif($char === '/' && $this->character($this->char + 1) !== '>') { |
|
| 610 | - /* U+002F SOLIDUS (/) |
|
| 609 | + } elseif($char === '/' && $this->character($this->char + 1) !== '>') { |
|
| 610 | + /* U+002F SOLIDUS (/) |
|
| 611 | 611 | Parse error unless this is a permitted slash. Switch to the |
| 612 | 612 | before attribute name state. */ |
| 613 | - $this->state = 'beforeAttributeName'; |
|
| 613 | + $this->state = 'beforeAttributeName'; |
|
| 614 | 614 | |
| 615 | - } elseif($this->char === $this->EOF) { |
|
| 616 | - /* EOF |
|
| 615 | + } elseif($this->char === $this->EOF) { |
|
| 616 | + /* EOF |
|
| 617 | 617 | Parse error. Emit the current tag token. Reconsume the EOF |
| 618 | 618 | character in the data state. */ |
| 619 | - $this->emitToken($this->token); |
|
| 619 | + $this->emitToken($this->token); |
|
| 620 | 620 | |
| 621 | - $this->char--; |
|
| 622 | - $this->state = 'data'; |
|
| 621 | + $this->char--; |
|
| 622 | + $this->state = 'data'; |
|
| 623 | 623 | |
| 624 | - } else { |
|
| 625 | - /* Anything else |
|
| 624 | + } else { |
|
| 625 | + /* Anything else |
|
| 626 | 626 | Start a new attribute in the current tag token. Set that attribute's |
| 627 | 627 | name to the current input character, and its value to the empty string. |
| 628 | 628 | Switch to the attribute name state. */ |
| 629 | - $this->token['attr'][] = array( |
|
| 630 | - 'name' => strtolower($char), |
|
| 631 | - 'value' => null |
|
| 632 | - ); |
|
| 633 | - |
|
| 634 | - $this->state = 'attributeName'; |
|
| 635 | - } |
|
| 636 | - } |
|
| 637 | - |
|
| 638 | - private function beforeAttributeValueState() { |
|
| 639 | - // Consume the next input character: |
|
| 640 | - $this->char++; |
|
| 641 | - $char = $this->character($this->char); |
|
| 642 | - |
|
| 643 | - if(preg_match('/^[\t\n\x0b\x0c ]$/', $char)) { |
|
| 644 | - /* U+0009 CHARACTER TABULATION |
|
| 629 | + $this->token['attr'][] = array( |
|
| 630 | + 'name' => strtolower($char), |
|
| 631 | + 'value' => null |
|
| 632 | + ); |
|
| 633 | + |
|
| 634 | + $this->state = 'attributeName'; |
|
| 635 | + } |
|
| 636 | + } |
|
| 637 | + |
|
| 638 | + private function beforeAttributeValueState() { |
|
| 639 | + // Consume the next input character: |
|
| 640 | + $this->char++; |
|
| 641 | + $char = $this->character($this->char); |
|
| 642 | + |
|
| 643 | + if(preg_match('/^[\t\n\x0b\x0c ]$/', $char)) { |
|
| 644 | + /* U+0009 CHARACTER TABULATION |
|
| 645 | 645 | U+000A LINE FEED (LF) |
| 646 | 646 | U+000B LINE TABULATION |
| 647 | 647 | U+000C FORM FEED (FF) |
| 648 | 648 | U+0020 SPACE |
| 649 | 649 | Stay in the before attribute value state. */ |
| 650 | - $this->state = 'beforeAttributeValue'; |
|
| 650 | + $this->state = 'beforeAttributeValue'; |
|
| 651 | 651 | |
| 652 | - } elseif($char === '"') { |
|
| 653 | - /* U+0022 QUOTATION MARK (") |
|
| 652 | + } elseif($char === '"') { |
|
| 653 | + /* U+0022 QUOTATION MARK (") |
|
| 654 | 654 | Switch to the attribute value (double-quoted) state. */ |
| 655 | - $this->state = 'attributeValueDoubleQuoted'; |
|
| 655 | + $this->state = 'attributeValueDoubleQuoted'; |
|
| 656 | 656 | |
| 657 | - } elseif($char === '&') { |
|
| 658 | - /* U+0026 AMPERSAND (&) |
|
| 657 | + } elseif($char === '&') { |
|
| 658 | + /* U+0026 AMPERSAND (&) |
|
| 659 | 659 | Switch to the attribute value (unquoted) state and reconsume |
| 660 | 660 | this input character. */ |
| 661 | - $this->char--; |
|
| 662 | - $this->state = 'attributeValueUnquoted'; |
|
| 661 | + $this->char--; |
|
| 662 | + $this->state = 'attributeValueUnquoted'; |
|
| 663 | 663 | |
| 664 | - } elseif($char === '\'') { |
|
| 665 | - /* U+0027 APOSTROPHE (') |
|
| 664 | + } elseif($char === '\'') { |
|
| 665 | + /* U+0027 APOSTROPHE (') |
|
| 666 | 666 | Switch to the attribute value (single-quoted) state. */ |
| 667 | - $this->state = 'attributeValueSingleQuoted'; |
|
| 667 | + $this->state = 'attributeValueSingleQuoted'; |
|
| 668 | 668 | |
| 669 | - } elseif($char === '>') { |
|
| 670 | - /* U+003E GREATER-THAN SIGN (>) |
|
| 669 | + } elseif($char === '>') { |
|
| 670 | + /* U+003E GREATER-THAN SIGN (>) |
|
| 671 | 671 | Emit the current tag token. Switch to the data state. */ |
| 672 | - $this->emitToken($this->token); |
|
| 673 | - $this->state = 'data'; |
|
| 672 | + $this->emitToken($this->token); |
|
| 673 | + $this->state = 'data'; |
|
| 674 | 674 | |
| 675 | - } else { |
|
| 676 | - /* Anything else |
|
| 675 | + } else { |
|
| 676 | + /* Anything else |
|
| 677 | 677 | Append the current input character to the current attribute's value. |
| 678 | 678 | Switch to the attribute value (unquoted) state. */ |
| 679 | - $last = count($this->token['attr']) - 1; |
|
| 680 | - $this->token['attr'][$last]['value'] .= $char; |
|
| 679 | + $last = count($this->token['attr']) - 1; |
|
| 680 | + $this->token['attr'][$last]['value'] .= $char; |
|
| 681 | 681 | |
| 682 | - $this->state = 'attributeValueUnquoted'; |
|
| 683 | - } |
|
| 684 | - } |
|
| 682 | + $this->state = 'attributeValueUnquoted'; |
|
| 683 | + } |
|
| 684 | + } |
|
| 685 | 685 | |
| 686 | - private function attributeValueDoubleQuotedState() { |
|
| 687 | - // Consume the next input character: |
|
| 688 | - $this->char++; |
|
| 689 | - $char = $this->character($this->char); |
|
| 686 | + private function attributeValueDoubleQuotedState() { |
|
| 687 | + // Consume the next input character: |
|
| 688 | + $this->char++; |
|
| 689 | + $char = $this->character($this->char); |
|
| 690 | 690 | |
| 691 | - if($char === '"') { |
|
| 692 | - /* U+0022 QUOTATION MARK (") |
|
| 691 | + if($char === '"') { |
|
| 692 | + /* U+0022 QUOTATION MARK (") |
|
| 693 | 693 | Switch to the before attribute name state. */ |
| 694 | - $this->state = 'beforeAttributeName'; |
|
| 694 | + $this->state = 'beforeAttributeName'; |
|
| 695 | 695 | |
| 696 | - } elseif($char === '&') { |
|
| 697 | - /* U+0026 AMPERSAND (&) |
|
| 696 | + } elseif($char === '&') { |
|
| 697 | + /* U+0026 AMPERSAND (&) |
|
| 698 | 698 | Switch to the entity in attribute value state. */ |
| 699 | - $this->entityInAttributeValueState('double'); |
|
| 699 | + $this->entityInAttributeValueState('double'); |
|
| 700 | 700 | |
| 701 | - } elseif($this->char === $this->EOF) { |
|
| 702 | - /* EOF |
|
| 701 | + } elseif($this->char === $this->EOF) { |
|
| 702 | + /* EOF |
|
| 703 | 703 | Parse error. Emit the current tag token. Reconsume the character |
| 704 | 704 | in the data state. */ |
| 705 | - $this->emitToken($this->token); |
|
| 705 | + $this->emitToken($this->token); |
|
| 706 | 706 | |
| 707 | - $this->char--; |
|
| 708 | - $this->state = 'data'; |
|
| 707 | + $this->char--; |
|
| 708 | + $this->state = 'data'; |
|
| 709 | 709 | |
| 710 | - } else { |
|
| 711 | - /* Anything else |
|
| 710 | + } else { |
|
| 711 | + /* Anything else |
|
| 712 | 712 | Append the current input character to the current attribute's value. |
| 713 | 713 | Stay in the attribute value (double-quoted) state. */ |
| 714 | - $last = count($this->token['attr']) - 1; |
|
| 715 | - $this->token['attr'][$last]['value'] .= $char; |
|
| 714 | + $last = count($this->token['attr']) - 1; |
|
| 715 | + $this->token['attr'][$last]['value'] .= $char; |
|
| 716 | 716 | |
| 717 | - $this->state = 'attributeValueDoubleQuoted'; |
|
| 718 | - } |
|
| 719 | - } |
|
| 717 | + $this->state = 'attributeValueDoubleQuoted'; |
|
| 718 | + } |
|
| 719 | + } |
|
| 720 | 720 | |
| 721 | - private function attributeValueSingleQuotedState() { |
|
| 722 | - // Consume the next input character: |
|
| 723 | - $this->char++; |
|
| 724 | - $char = $this->character($this->char); |
|
| 721 | + private function attributeValueSingleQuotedState() { |
|
| 722 | + // Consume the next input character: |
|
| 723 | + $this->char++; |
|
| 724 | + $char = $this->character($this->char); |
|
| 725 | 725 | |
| 726 | - if($char === '\'') { |
|
| 727 | - /* U+0022 QUOTATION MARK (') |
|
| 726 | + if($char === '\'') { |
|
| 727 | + /* U+0022 QUOTATION MARK (') |
|
| 728 | 728 | Switch to the before attribute name state. */ |
| 729 | - $this->state = 'beforeAttributeName'; |
|
| 729 | + $this->state = 'beforeAttributeName'; |
|
| 730 | 730 | |
| 731 | - } elseif($char === '&') { |
|
| 732 | - /* U+0026 AMPERSAND (&) |
|
| 731 | + } elseif($char === '&') { |
|
| 732 | + /* U+0026 AMPERSAND (&) |
|
| 733 | 733 | Switch to the entity in attribute value state. */ |
| 734 | - $this->entityInAttributeValueState('single'); |
|
| 734 | + $this->entityInAttributeValueState('single'); |
|
| 735 | 735 | |
| 736 | - } elseif($this->char === $this->EOF) { |
|
| 737 | - /* EOF |
|
| 736 | + } elseif($this->char === $this->EOF) { |
|
| 737 | + /* EOF |
|
| 738 | 738 | Parse error. Emit the current tag token. Reconsume the character |
| 739 | 739 | in the data state. */ |
| 740 | - $this->emitToken($this->token); |
|
| 740 | + $this->emitToken($this->token); |
|
| 741 | 741 | |
| 742 | - $this->char--; |
|
| 743 | - $this->state = 'data'; |
|
| 742 | + $this->char--; |
|
| 743 | + $this->state = 'data'; |
|
| 744 | 744 | |
| 745 | - } else { |
|
| 746 | - /* Anything else |
|
| 745 | + } else { |
|
| 746 | + /* Anything else |
|
| 747 | 747 | Append the current input character to the current attribute's value. |
| 748 | 748 | Stay in the attribute value (single-quoted) state. */ |
| 749 | - $last = count($this->token['attr']) - 1; |
|
| 750 | - $this->token['attr'][$last]['value'] .= $char; |
|
| 749 | + $last = count($this->token['attr']) - 1; |
|
| 750 | + $this->token['attr'][$last]['value'] .= $char; |
|
| 751 | 751 | |
| 752 | - $this->state = 'attributeValueSingleQuoted'; |
|
| 753 | - } |
|
| 754 | - } |
|
| 752 | + $this->state = 'attributeValueSingleQuoted'; |
|
| 753 | + } |
|
| 754 | + } |
|
| 755 | 755 | |
| 756 | - private function attributeValueUnquotedState() { |
|
| 757 | - // Consume the next input character: |
|
| 758 | - $this->char++; |
|
| 759 | - $char = $this->character($this->char); |
|
| 756 | + private function attributeValueUnquotedState() { |
|
| 757 | + // Consume the next input character: |
|
| 758 | + $this->char++; |
|
| 759 | + $char = $this->character($this->char); |
|
| 760 | 760 | |
| 761 | - if(preg_match('/^[\t\n\x0b\x0c ]$/', $char)) { |
|
| 762 | - /* U+0009 CHARACTER TABULATION |
|
| 761 | + if(preg_match('/^[\t\n\x0b\x0c ]$/', $char)) { |
|
| 762 | + /* U+0009 CHARACTER TABULATION |
|
| 763 | 763 | U+000A LINE FEED (LF) |
| 764 | 764 | U+000B LINE TABULATION |
| 765 | 765 | U+000C FORM FEED (FF) |
| 766 | 766 | U+0020 SPACE |
| 767 | 767 | Switch to the before attribute name state. */ |
| 768 | - $this->state = 'beforeAttributeName'; |
|
| 768 | + $this->state = 'beforeAttributeName'; |
|
| 769 | 769 | |
| 770 | - } elseif($char === '&') { |
|
| 771 | - /* U+0026 AMPERSAND (&) |
|
| 770 | + } elseif($char === '&') { |
|
| 771 | + /* U+0026 AMPERSAND (&) |
|
| 772 | 772 | Switch to the entity in attribute value state. */ |
| 773 | - $this->entityInAttributeValueState(); |
|
| 773 | + $this->entityInAttributeValueState(); |
|
| 774 | 774 | |
| 775 | - } elseif($char === '>') { |
|
| 776 | - /* U+003E GREATER-THAN SIGN (>) |
|
| 775 | + } elseif($char === '>') { |
|
| 776 | + /* U+003E GREATER-THAN SIGN (>) |
|
| 777 | 777 | Emit the current tag token. Switch to the data state. */ |
| 778 | - $this->emitToken($this->token); |
|
| 779 | - $this->state = 'data'; |
|
| 778 | + $this->emitToken($this->token); |
|
| 779 | + $this->state = 'data'; |
|
| 780 | 780 | |
| 781 | - } else { |
|
| 782 | - /* Anything else |
|
| 781 | + } else { |
|
| 782 | + /* Anything else |
|
| 783 | 783 | Append the current input character to the current attribute's value. |
| 784 | 784 | Stay in the attribute value (unquoted) state. */ |
| 785 | - $last = count($this->token['attr']) - 1; |
|
| 786 | - $this->token['attr'][$last]['value'] .= $char; |
|
| 787 | - |
|
| 788 | - $this->state = 'attributeValueUnquoted'; |
|
| 789 | - } |
|
| 790 | - } |
|
| 791 | - |
|
| 792 | - private function entityInAttributeValueState() { |
|
| 793 | - // Attempt to consume an entity. |
|
| 794 | - $entity = $this->entity(); |
|
| 795 | - |
|
| 796 | - // If nothing is returned, append a U+0026 AMPERSAND character to the |
|
| 797 | - // current attribute's value. Otherwise, emit the character token that |
|
| 798 | - // was returned. |
|
| 799 | - $char = (!$entity) |
|
| 800 | - ? '&' |
|
| 801 | - : $entity; |
|
| 802 | - |
|
| 803 | - $last = count($this->token['attr']) - 1; |
|
| 804 | - $this->token['attr'][$last]['value'] .= $char; |
|
| 805 | - } |
|
| 806 | - |
|
| 807 | - private function bogusCommentState() { |
|
| 808 | - /* Consume every character up to the first U+003E GREATER-THAN SIGN |
|
| 785 | + $last = count($this->token['attr']) - 1; |
|
| 786 | + $this->token['attr'][$last]['value'] .= $char; |
|
| 787 | + |
|
| 788 | + $this->state = 'attributeValueUnquoted'; |
|
| 789 | + } |
|
| 790 | + } |
|
| 791 | + |
|
| 792 | + private function entityInAttributeValueState() { |
|
| 793 | + // Attempt to consume an entity. |
|
| 794 | + $entity = $this->entity(); |
|
| 795 | + |
|
| 796 | + // If nothing is returned, append a U+0026 AMPERSAND character to the |
|
| 797 | + // current attribute's value. Otherwise, emit the character token that |
|
| 798 | + // was returned. |
|
| 799 | + $char = (!$entity) |
|
| 800 | + ? '&' |
|
| 801 | + : $entity; |
|
| 802 | + |
|
| 803 | + $last = count($this->token['attr']) - 1; |
|
| 804 | + $this->token['attr'][$last]['value'] .= $char; |
|
| 805 | + } |
|
| 806 | + |
|
| 807 | + private function bogusCommentState() { |
|
| 808 | + /* Consume every character up to the first U+003E GREATER-THAN SIGN |
|
| 809 | 809 | character (>) or the end of the file (EOF), whichever comes first. Emit |
| 810 | 810 | a comment token whose data is the concatenation of all the characters |
| 811 | 811 | starting from and including the character that caused the state machine |
@@ -813,446 +813,446 @@ discard block |
||
| 813 | 813 | consumed character before the U+003E character, if any, or up to the |
| 814 | 814 | end of the file otherwise. (If the comment was started by the end of |
| 815 | 815 | the file (EOF), the token is empty.) */ |
| 816 | - $data = $this->characters('^>', $this->char); |
|
| 817 | - $this->emitToken(array( |
|
| 818 | - 'data' => $data, |
|
| 819 | - 'type' => self::COMMENT |
|
| 820 | - )); |
|
| 816 | + $data = $this->characters('^>', $this->char); |
|
| 817 | + $this->emitToken(array( |
|
| 818 | + 'data' => $data, |
|
| 819 | + 'type' => self::COMMENT |
|
| 820 | + )); |
|
| 821 | 821 | |
| 822 | - $this->char += strlen($data); |
|
| 822 | + $this->char += strlen($data); |
|
| 823 | 823 | |
| 824 | - /* Switch to the data state. */ |
|
| 825 | - $this->state = 'data'; |
|
| 824 | + /* Switch to the data state. */ |
|
| 825 | + $this->state = 'data'; |
|
| 826 | 826 | |
| 827 | - /* If the end of the file was reached, reconsume the EOF character. */ |
|
| 828 | - if($this->char === $this->EOF) { |
|
| 829 | - $this->char = $this->EOF - 1; |
|
| 830 | - } |
|
| 831 | - } |
|
| 827 | + /* If the end of the file was reached, reconsume the EOF character. */ |
|
| 828 | + if($this->char === $this->EOF) { |
|
| 829 | + $this->char = $this->EOF - 1; |
|
| 830 | + } |
|
| 831 | + } |
|
| 832 | 832 | |
| 833 | - private function markupDeclarationOpenState() { |
|
| 834 | - /* If the next two characters are both U+002D HYPHEN-MINUS (-) |
|
| 833 | + private function markupDeclarationOpenState() { |
|
| 834 | + /* If the next two characters are both U+002D HYPHEN-MINUS (-) |
|
| 835 | 835 | characters, consume those two characters, create a comment token whose |
| 836 | 836 | data is the empty string, and switch to the comment state. */ |
| 837 | - if($this->character($this->char + 1, 2) === '--') { |
|
| 838 | - $this->char += 2; |
|
| 839 | - $this->state = 'comment'; |
|
| 840 | - $this->token = array( |
|
| 841 | - 'data' => null, |
|
| 842 | - 'type' => self::COMMENT |
|
| 843 | - ); |
|
| 844 | - |
|
| 845 | - /* Otherwise if the next seven chacacters are a case-insensitive match |
|
| 837 | + if($this->character($this->char + 1, 2) === '--') { |
|
| 838 | + $this->char += 2; |
|
| 839 | + $this->state = 'comment'; |
|
| 840 | + $this->token = array( |
|
| 841 | + 'data' => null, |
|
| 842 | + 'type' => self::COMMENT |
|
| 843 | + ); |
|
| 844 | + |
|
| 845 | + /* Otherwise if the next seven chacacters are a case-insensitive match |
|
| 846 | 846 | for the word "DOCTYPE", then consume those characters and switch to the |
| 847 | 847 | DOCTYPE state. */ |
| 848 | - } elseif(strtolower($this->character($this->char + 1, 7)) === 'doctype') { |
|
| 849 | - $this->char += 7; |
|
| 850 | - $this->state = 'doctype'; |
|
| 848 | + } elseif(strtolower($this->character($this->char + 1, 7)) === 'doctype') { |
|
| 849 | + $this->char += 7; |
|
| 850 | + $this->state = 'doctype'; |
|
| 851 | 851 | |
| 852 | - /* Otherwise, is is a parse error. Switch to the bogus comment state. |
|
| 852 | + /* Otherwise, is is a parse error. Switch to the bogus comment state. |
|
| 853 | 853 | The next character that is consumed, if any, is the first character |
| 854 | 854 | that will be in the comment. */ |
| 855 | - } else { |
|
| 856 | - $this->char++; |
|
| 857 | - $this->state = 'bogusComment'; |
|
| 858 | - } |
|
| 859 | - } |
|
| 860 | - |
|
| 861 | - private function commentState() { |
|
| 862 | - /* Consume the next input character: */ |
|
| 863 | - $this->char++; |
|
| 864 | - $char = $this->char(); |
|
| 865 | - |
|
| 866 | - /* U+002D HYPHEN-MINUS (-) */ |
|
| 867 | - if($char === '-') { |
|
| 868 | - /* Switch to the comment dash state */ |
|
| 869 | - $this->state = 'commentDash'; |
|
| 870 | - |
|
| 871 | - /* EOF */ |
|
| 872 | - } elseif($this->char === $this->EOF) { |
|
| 873 | - /* Parse error. Emit the comment token. Reconsume the EOF character |
|
| 855 | + } else { |
|
| 856 | + $this->char++; |
|
| 857 | + $this->state = 'bogusComment'; |
|
| 858 | + } |
|
| 859 | + } |
|
| 860 | + |
|
| 861 | + private function commentState() { |
|
| 862 | + /* Consume the next input character: */ |
|
| 863 | + $this->char++; |
|
| 864 | + $char = $this->char(); |
|
| 865 | + |
|
| 866 | + /* U+002D HYPHEN-MINUS (-) */ |
|
| 867 | + if($char === '-') { |
|
| 868 | + /* Switch to the comment dash state */ |
|
| 869 | + $this->state = 'commentDash'; |
|
| 870 | + |
|
| 871 | + /* EOF */ |
|
| 872 | + } elseif($this->char === $this->EOF) { |
|
| 873 | + /* Parse error. Emit the comment token. Reconsume the EOF character |
|
| 874 | 874 | in the data state. */ |
| 875 | - $this->emitToken($this->token); |
|
| 876 | - $this->char--; |
|
| 877 | - $this->state = 'data'; |
|
| 875 | + $this->emitToken($this->token); |
|
| 876 | + $this->char--; |
|
| 877 | + $this->state = 'data'; |
|
| 878 | 878 | |
| 879 | - /* Anything else */ |
|
| 880 | - } else { |
|
| 881 | - /* Append the input character to the comment token's data. Stay in |
|
| 879 | + /* Anything else */ |
|
| 880 | + } else { |
|
| 881 | + /* Append the input character to the comment token's data. Stay in |
|
| 882 | 882 | the comment state. */ |
| 883 | - $this->token['data'] .= $char; |
|
| 884 | - } |
|
| 885 | - } |
|
| 886 | - |
|
| 887 | - private function commentDashState() { |
|
| 888 | - /* Consume the next input character: */ |
|
| 889 | - $this->char++; |
|
| 890 | - $char = $this->char(); |
|
| 891 | - |
|
| 892 | - /* U+002D HYPHEN-MINUS (-) */ |
|
| 893 | - if($char === '-') { |
|
| 894 | - /* Switch to the comment end state */ |
|
| 895 | - $this->state = 'commentEnd'; |
|
| 896 | - |
|
| 897 | - /* EOF */ |
|
| 898 | - } elseif($this->char === $this->EOF) { |
|
| 899 | - /* Parse error. Emit the comment token. Reconsume the EOF character |
|
| 883 | + $this->token['data'] .= $char; |
|
| 884 | + } |
|
| 885 | + } |
|
| 886 | + |
|
| 887 | + private function commentDashState() { |
|
| 888 | + /* Consume the next input character: */ |
|
| 889 | + $this->char++; |
|
| 890 | + $char = $this->char(); |
|
| 891 | + |
|
| 892 | + /* U+002D HYPHEN-MINUS (-) */ |
|
| 893 | + if($char === '-') { |
|
| 894 | + /* Switch to the comment end state */ |
|
| 895 | + $this->state = 'commentEnd'; |
|
| 896 | + |
|
| 897 | + /* EOF */ |
|
| 898 | + } elseif($this->char === $this->EOF) { |
|
| 899 | + /* Parse error. Emit the comment token. Reconsume the EOF character |
|
| 900 | 900 | in the data state. */ |
| 901 | - $this->emitToken($this->token); |
|
| 902 | - $this->char--; |
|
| 903 | - $this->state = 'data'; |
|
| 901 | + $this->emitToken($this->token); |
|
| 902 | + $this->char--; |
|
| 903 | + $this->state = 'data'; |
|
| 904 | 904 | |
| 905 | - /* Anything else */ |
|
| 906 | - } else { |
|
| 907 | - /* Append a U+002D HYPHEN-MINUS (-) character and the input |
|
| 905 | + /* Anything else */ |
|
| 906 | + } else { |
|
| 907 | + /* Append a U+002D HYPHEN-MINUS (-) character and the input |
|
| 908 | 908 | character to the comment token's data. Switch to the comment state. */ |
| 909 | - $this->token['data'] .= '-'.$char; |
|
| 910 | - $this->state = 'comment'; |
|
| 911 | - } |
|
| 912 | - } |
|
| 913 | - |
|
| 914 | - private function commentEndState() { |
|
| 915 | - /* Consume the next input character: */ |
|
| 916 | - $this->char++; |
|
| 917 | - $char = $this->char(); |
|
| 918 | - |
|
| 919 | - if($char === '>') { |
|
| 920 | - $this->emitToken($this->token); |
|
| 921 | - $this->state = 'data'; |
|
| 922 | - |
|
| 923 | - } elseif($char === '-') { |
|
| 924 | - $this->token['data'] .= '-'; |
|
| 925 | - |
|
| 926 | - } elseif($this->char === $this->EOF) { |
|
| 927 | - $this->emitToken($this->token); |
|
| 928 | - $this->char--; |
|
| 929 | - $this->state = 'data'; |
|
| 930 | - |
|
| 931 | - } else { |
|
| 932 | - $this->token['data'] .= '--'.$char; |
|
| 933 | - $this->state = 'comment'; |
|
| 934 | - } |
|
| 935 | - } |
|
| 936 | - |
|
| 937 | - private function doctypeState() { |
|
| 938 | - /* Consume the next input character: */ |
|
| 939 | - $this->char++; |
|
| 940 | - $char = $this->char(); |
|
| 941 | - |
|
| 942 | - if(preg_match('/^[\t\n\x0b\x0c ]$/', $char)) { |
|
| 943 | - $this->state = 'beforeDoctypeName'; |
|
| 944 | - |
|
| 945 | - } else { |
|
| 946 | - $this->char--; |
|
| 947 | - $this->state = 'beforeDoctypeName'; |
|
| 948 | - } |
|
| 949 | - } |
|
| 950 | - |
|
| 951 | - private function beforeDoctypeNameState() { |
|
| 952 | - /* Consume the next input character: */ |
|
| 953 | - $this->char++; |
|
| 954 | - $char = $this->char(); |
|
| 955 | - |
|
| 956 | - if(preg_match('/^[\t\n\x0b\x0c ]$/', $char)) { |
|
| 957 | - // Stay in the before DOCTYPE name state. |
|
| 958 | - |
|
| 959 | - } elseif(preg_match('/^[a-z]$/', $char)) { |
|
| 960 | - $this->token = array( |
|
| 961 | - 'name' => strtoupper($char), |
|
| 962 | - 'type' => self::DOCTYPE, |
|
| 963 | - 'error' => true |
|
| 964 | - ); |
|
| 965 | - |
|
| 966 | - $this->state = 'doctypeName'; |
|
| 967 | - |
|
| 968 | - } elseif($char === '>') { |
|
| 969 | - $this->emitToken(array( |
|
| 970 | - 'name' => null, |
|
| 971 | - 'type' => self::DOCTYPE, |
|
| 972 | - 'error' => true |
|
| 973 | - )); |
|
| 974 | - |
|
| 975 | - $this->state = 'data'; |
|
| 976 | - |
|
| 977 | - } elseif($this->char === $this->EOF) { |
|
| 978 | - $this->emitToken(array( |
|
| 979 | - 'name' => null, |
|
| 980 | - 'type' => self::DOCTYPE, |
|
| 981 | - 'error' => true |
|
| 982 | - )); |
|
| 983 | - |
|
| 984 | - $this->char--; |
|
| 985 | - $this->state = 'data'; |
|
| 986 | - |
|
| 987 | - } else { |
|
| 988 | - $this->token = array( |
|
| 989 | - 'name' => $char, |
|
| 990 | - 'type' => self::DOCTYPE, |
|
| 991 | - 'error' => true |
|
| 992 | - ); |
|
| 993 | - |
|
| 994 | - $this->state = 'doctypeName'; |
|
| 995 | - } |
|
| 996 | - } |
|
| 997 | - |
|
| 998 | - private function doctypeNameState() { |
|
| 999 | - /* Consume the next input character: */ |
|
| 1000 | - $this->char++; |
|
| 1001 | - $char = $this->char(); |
|
| 1002 | - |
|
| 1003 | - if(preg_match('/^[\t\n\x0b\x0c ]$/', $char)) { |
|
| 1004 | - $this->state = 'AfterDoctypeName'; |
|
| 1005 | - |
|
| 1006 | - } elseif($char === '>') { |
|
| 1007 | - $this->emitToken($this->token); |
|
| 1008 | - $this->state = 'data'; |
|
| 1009 | - |
|
| 1010 | - } elseif(preg_match('/^[a-z]$/', $char)) { |
|
| 1011 | - $this->token['name'] .= strtoupper($char); |
|
| 1012 | - |
|
| 1013 | - } elseif($this->char === $this->EOF) { |
|
| 1014 | - $this->emitToken($this->token); |
|
| 1015 | - $this->char--; |
|
| 1016 | - $this->state = 'data'; |
|
| 1017 | - |
|
| 1018 | - } else { |
|
| 1019 | - $this->token['name'] .= $char; |
|
| 1020 | - } |
|
| 1021 | - |
|
| 1022 | - $this->token['error'] = ($this->token['name'] === 'HTML') |
|
| 1023 | - ? false |
|
| 1024 | - : true; |
|
| 1025 | - } |
|
| 1026 | - |
|
| 1027 | - private function afterDoctypeNameState() { |
|
| 1028 | - /* Consume the next input character: */ |
|
| 1029 | - $this->char++; |
|
| 1030 | - $char = $this->char(); |
|
| 1031 | - |
|
| 1032 | - if(preg_match('/^[\t\n\x0b\x0c ]$/', $char)) { |
|
| 1033 | - // Stay in the DOCTYPE name state. |
|
| 1034 | - |
|
| 1035 | - } elseif($char === '>') { |
|
| 1036 | - $this->emitToken($this->token); |
|
| 1037 | - $this->state = 'data'; |
|
| 1038 | - |
|
| 1039 | - } elseif($this->char === $this->EOF) { |
|
| 1040 | - $this->emitToken($this->token); |
|
| 1041 | - $this->char--; |
|
| 1042 | - $this->state = 'data'; |
|
| 1043 | - |
|
| 1044 | - } else { |
|
| 1045 | - $this->token['error'] = true; |
|
| 1046 | - $this->state = 'bogusDoctype'; |
|
| 1047 | - } |
|
| 1048 | - } |
|
| 1049 | - |
|
| 1050 | - private function bogusDoctypeState() { |
|
| 1051 | - /* Consume the next input character: */ |
|
| 1052 | - $this->char++; |
|
| 1053 | - $char = $this->char(); |
|
| 1054 | - |
|
| 1055 | - if($char === '>') { |
|
| 1056 | - $this->emitToken($this->token); |
|
| 1057 | - $this->state = 'data'; |
|
| 1058 | - |
|
| 1059 | - } elseif($this->char === $this->EOF) { |
|
| 1060 | - $this->emitToken($this->token); |
|
| 1061 | - $this->char--; |
|
| 1062 | - $this->state = 'data'; |
|
| 1063 | - |
|
| 1064 | - } else { |
|
| 1065 | - // Stay in the bogus DOCTYPE state. |
|
| 1066 | - } |
|
| 1067 | - } |
|
| 1068 | - |
|
| 1069 | - private function entity() { |
|
| 1070 | - $start = $this->char; |
|
| 1071 | - |
|
| 1072 | - // This section defines how to consume an entity. This definition is |
|
| 1073 | - // used when parsing entities in text and in attributes. |
|
| 1074 | - |
|
| 1075 | - // The behaviour depends on the identity of the next character (the |
|
| 1076 | - // one immediately after the U+0026 AMPERSAND character): |
|
| 1077 | - |
|
| 1078 | - switch($this->character($this->char + 1)) { |
|
| 1079 | - // U+0023 NUMBER SIGN (#) |
|
| 1080 | - case '#': |
|
| 1081 | - |
|
| 1082 | - // The behaviour further depends on the character after the |
|
| 1083 | - // U+0023 NUMBER SIGN: |
|
| 1084 | - switch($this->character($this->char + 1)) { |
|
| 1085 | - // U+0078 LATIN SMALL LETTER X |
|
| 1086 | - // U+0058 LATIN CAPITAL LETTER X |
|
| 1087 | - case 'x': |
|
| 1088 | - case 'X': |
|
| 1089 | - // Follow the steps below, but using the range of |
|
| 1090 | - // characters U+0030 DIGIT ZERO through to U+0039 DIGIT |
|
| 1091 | - // NINE, U+0061 LATIN SMALL LETTER A through to U+0066 |
|
| 1092 | - // LATIN SMALL LETTER F, and U+0041 LATIN CAPITAL LETTER |
|
| 1093 | - // A, through to U+0046 LATIN CAPITAL LETTER F (in other |
|
| 1094 | - // words, 0-9, A-F, a-f). |
|
| 1095 | - $char = 1; |
|
| 1096 | - $char_class = '0-9A-Fa-f'; |
|
| 1097 | - break; |
|
| 1098 | - |
|
| 1099 | - // Anything else |
|
| 1100 | - default: |
|
| 1101 | - // Follow the steps below, but using the range of |
|
| 1102 | - // characters U+0030 DIGIT ZERO through to U+0039 DIGIT |
|
| 1103 | - // NINE (i.e. just 0-9). |
|
| 1104 | - $char = 0; |
|
| 1105 | - $char_class = '0-9'; |
|
| 1106 | - break; |
|
| 1107 | - } |
|
| 1108 | - |
|
| 1109 | - // Consume as many characters as match the range of characters |
|
| 1110 | - // given above. |
|
| 1111 | - $this->char++; |
|
| 1112 | - $e_name = $this->characters($char_class, $this->char + $char + 1); |
|
| 1113 | - $entity = $this->character($start, $this->char); |
|
| 1114 | - $cond = strlen($e_name) > 0; |
|
| 1115 | - |
|
| 1116 | - // The rest of the parsing happens bellow. |
|
| 1117 | - break; |
|
| 1118 | - |
|
| 1119 | - // Anything else |
|
| 1120 | - default: |
|
| 1121 | - // Consume the maximum number of characters possible, with the |
|
| 1122 | - // consumed characters case-sensitively matching one of the |
|
| 1123 | - // identifiers in the first column of the entities table. |
|
| 1124 | - $e_name = $this->characters('0-9A-Za-z;', $this->char + 1); |
|
| 1125 | - $len = strlen($e_name); |
|
| 1126 | - |
|
| 1127 | - for($c = 1; $c <= $len; $c++) { |
|
| 1128 | - $id = substr($e_name, 0, $c); |
|
| 1129 | - $this->char++; |
|
| 1130 | - |
|
| 1131 | - if(in_array($id, $this->entities)) { |
|
| 1132 | - if ($e_name[$c-1] !== ';') { |
|
| 1133 | - if ($c < $len && $e_name[$c] == ';') { |
|
| 1134 | - $this->char++; // consume extra semicolon |
|
| 1135 | - } |
|
| 1136 | - } |
|
| 1137 | - $entity = $id; |
|
| 1138 | - break; |
|
| 1139 | - } |
|
| 1140 | - } |
|
| 1141 | - |
|
| 1142 | - $cond = isset($entity); |
|
| 1143 | - // The rest of the parsing happens bellow. |
|
| 1144 | - break; |
|
| 1145 | - } |
|
| 1146 | - |
|
| 1147 | - if(!$cond) { |
|
| 1148 | - // If no match can be made, then this is a parse error. No |
|
| 1149 | - // characters are consumed, and nothing is returned. |
|
| 1150 | - $this->char = $start; |
|
| 1151 | - return false; |
|
| 1152 | - } |
|
| 1153 | - |
|
| 1154 | - // Return a character token for the character corresponding to the |
|
| 1155 | - // entity name (as given by the second column of the entities table). |
|
| 1156 | - return html_entity_decode('&'.$entity.';', ENT_QUOTES, 'UTF-8'); |
|
| 1157 | - } |
|
| 1158 | - |
|
| 1159 | - private function emitToken($token) { |
|
| 1160 | - $emit = $this->tree->emitToken($token); |
|
| 1161 | - |
|
| 1162 | - if(is_int($emit)) { |
|
| 1163 | - $this->content_model = $emit; |
|
| 1164 | - |
|
| 1165 | - } elseif($token['type'] === self::ENDTAG) { |
|
| 1166 | - $this->content_model = self::PCDATA; |
|
| 1167 | - } |
|
| 1168 | - } |
|
| 1169 | - |
|
| 1170 | - private function EOF() { |
|
| 1171 | - $this->state = null; |
|
| 1172 | - $this->tree->emitToken(array( |
|
| 1173 | - 'type' => self::EOF |
|
| 1174 | - )); |
|
| 1175 | - } |
|
| 909 | + $this->token['data'] .= '-'.$char; |
|
| 910 | + $this->state = 'comment'; |
|
| 911 | + } |
|
| 912 | + } |
|
| 913 | + |
|
| 914 | + private function commentEndState() { |
|
| 915 | + /* Consume the next input character: */ |
|
| 916 | + $this->char++; |
|
| 917 | + $char = $this->char(); |
|
| 918 | + |
|
| 919 | + if($char === '>') { |
|
| 920 | + $this->emitToken($this->token); |
|
| 921 | + $this->state = 'data'; |
|
| 922 | + |
|
| 923 | + } elseif($char === '-') { |
|
| 924 | + $this->token['data'] .= '-'; |
|
| 925 | + |
|
| 926 | + } elseif($this->char === $this->EOF) { |
|
| 927 | + $this->emitToken($this->token); |
|
| 928 | + $this->char--; |
|
| 929 | + $this->state = 'data'; |
|
| 930 | + |
|
| 931 | + } else { |
|
| 932 | + $this->token['data'] .= '--'.$char; |
|
| 933 | + $this->state = 'comment'; |
|
| 934 | + } |
|
| 935 | + } |
|
| 936 | + |
|
| 937 | + private function doctypeState() { |
|
| 938 | + /* Consume the next input character: */ |
|
| 939 | + $this->char++; |
|
| 940 | + $char = $this->char(); |
|
| 941 | + |
|
| 942 | + if(preg_match('/^[\t\n\x0b\x0c ]$/', $char)) { |
|
| 943 | + $this->state = 'beforeDoctypeName'; |
|
| 944 | + |
|
| 945 | + } else { |
|
| 946 | + $this->char--; |
|
| 947 | + $this->state = 'beforeDoctypeName'; |
|
| 948 | + } |
|
| 949 | + } |
|
| 950 | + |
|
| 951 | + private function beforeDoctypeNameState() { |
|
| 952 | + /* Consume the next input character: */ |
|
| 953 | + $this->char++; |
|
| 954 | + $char = $this->char(); |
|
| 955 | + |
|
| 956 | + if(preg_match('/^[\t\n\x0b\x0c ]$/', $char)) { |
|
| 957 | + // Stay in the before DOCTYPE name state. |
|
| 958 | + |
|
| 959 | + } elseif(preg_match('/^[a-z]$/', $char)) { |
|
| 960 | + $this->token = array( |
|
| 961 | + 'name' => strtoupper($char), |
|
| 962 | + 'type' => self::DOCTYPE, |
|
| 963 | + 'error' => true |
|
| 964 | + ); |
|
| 965 | + |
|
| 966 | + $this->state = 'doctypeName'; |
|
| 967 | + |
|
| 968 | + } elseif($char === '>') { |
|
| 969 | + $this->emitToken(array( |
|
| 970 | + 'name' => null, |
|
| 971 | + 'type' => self::DOCTYPE, |
|
| 972 | + 'error' => true |
|
| 973 | + )); |
|
| 974 | + |
|
| 975 | + $this->state = 'data'; |
|
| 976 | + |
|
| 977 | + } elseif($this->char === $this->EOF) { |
|
| 978 | + $this->emitToken(array( |
|
| 979 | + 'name' => null, |
|
| 980 | + 'type' => self::DOCTYPE, |
|
| 981 | + 'error' => true |
|
| 982 | + )); |
|
| 983 | + |
|
| 984 | + $this->char--; |
|
| 985 | + $this->state = 'data'; |
|
| 986 | + |
|
| 987 | + } else { |
|
| 988 | + $this->token = array( |
|
| 989 | + 'name' => $char, |
|
| 990 | + 'type' => self::DOCTYPE, |
|
| 991 | + 'error' => true |
|
| 992 | + ); |
|
| 993 | + |
|
| 994 | + $this->state = 'doctypeName'; |
|
| 995 | + } |
|
| 996 | + } |
|
| 997 | + |
|
| 998 | + private function doctypeNameState() { |
|
| 999 | + /* Consume the next input character: */ |
|
| 1000 | + $this->char++; |
|
| 1001 | + $char = $this->char(); |
|
| 1002 | + |
|
| 1003 | + if(preg_match('/^[\t\n\x0b\x0c ]$/', $char)) { |
|
| 1004 | + $this->state = 'AfterDoctypeName'; |
|
| 1005 | + |
|
| 1006 | + } elseif($char === '>') { |
|
| 1007 | + $this->emitToken($this->token); |
|
| 1008 | + $this->state = 'data'; |
|
| 1009 | + |
|
| 1010 | + } elseif(preg_match('/^[a-z]$/', $char)) { |
|
| 1011 | + $this->token['name'] .= strtoupper($char); |
|
| 1012 | + |
|
| 1013 | + } elseif($this->char === $this->EOF) { |
|
| 1014 | + $this->emitToken($this->token); |
|
| 1015 | + $this->char--; |
|
| 1016 | + $this->state = 'data'; |
|
| 1017 | + |
|
| 1018 | + } else { |
|
| 1019 | + $this->token['name'] .= $char; |
|
| 1020 | + } |
|
| 1021 | + |
|
| 1022 | + $this->token['error'] = ($this->token['name'] === 'HTML') |
|
| 1023 | + ? false |
|
| 1024 | + : true; |
|
| 1025 | + } |
|
| 1026 | + |
|
| 1027 | + private function afterDoctypeNameState() { |
|
| 1028 | + /* Consume the next input character: */ |
|
| 1029 | + $this->char++; |
|
| 1030 | + $char = $this->char(); |
|
| 1031 | + |
|
| 1032 | + if(preg_match('/^[\t\n\x0b\x0c ]$/', $char)) { |
|
| 1033 | + // Stay in the DOCTYPE name state. |
|
| 1034 | + |
|
| 1035 | + } elseif($char === '>') { |
|
| 1036 | + $this->emitToken($this->token); |
|
| 1037 | + $this->state = 'data'; |
|
| 1038 | + |
|
| 1039 | + } elseif($this->char === $this->EOF) { |
|
| 1040 | + $this->emitToken($this->token); |
|
| 1041 | + $this->char--; |
|
| 1042 | + $this->state = 'data'; |
|
| 1043 | + |
|
| 1044 | + } else { |
|
| 1045 | + $this->token['error'] = true; |
|
| 1046 | + $this->state = 'bogusDoctype'; |
|
| 1047 | + } |
|
| 1048 | + } |
|
| 1049 | + |
|
| 1050 | + private function bogusDoctypeState() { |
|
| 1051 | + /* Consume the next input character: */ |
|
| 1052 | + $this->char++; |
|
| 1053 | + $char = $this->char(); |
|
| 1054 | + |
|
| 1055 | + if($char === '>') { |
|
| 1056 | + $this->emitToken($this->token); |
|
| 1057 | + $this->state = 'data'; |
|
| 1058 | + |
|
| 1059 | + } elseif($this->char === $this->EOF) { |
|
| 1060 | + $this->emitToken($this->token); |
|
| 1061 | + $this->char--; |
|
| 1062 | + $this->state = 'data'; |
|
| 1063 | + |
|
| 1064 | + } else { |
|
| 1065 | + // Stay in the bogus DOCTYPE state. |
|
| 1066 | + } |
|
| 1067 | + } |
|
| 1068 | + |
|
| 1069 | + private function entity() { |
|
| 1070 | + $start = $this->char; |
|
| 1071 | + |
|
| 1072 | + // This section defines how to consume an entity. This definition is |
|
| 1073 | + // used when parsing entities in text and in attributes. |
|
| 1074 | + |
|
| 1075 | + // The behaviour depends on the identity of the next character (the |
|
| 1076 | + // one immediately after the U+0026 AMPERSAND character): |
|
| 1077 | + |
|
| 1078 | + switch($this->character($this->char + 1)) { |
|
| 1079 | + // U+0023 NUMBER SIGN (#) |
|
| 1080 | + case '#': |
|
| 1081 | + |
|
| 1082 | + // The behaviour further depends on the character after the |
|
| 1083 | + // U+0023 NUMBER SIGN: |
|
| 1084 | + switch($this->character($this->char + 1)) { |
|
| 1085 | + // U+0078 LATIN SMALL LETTER X |
|
| 1086 | + // U+0058 LATIN CAPITAL LETTER X |
|
| 1087 | + case 'x': |
|
| 1088 | + case 'X': |
|
| 1089 | + // Follow the steps below, but using the range of |
|
| 1090 | + // characters U+0030 DIGIT ZERO through to U+0039 DIGIT |
|
| 1091 | + // NINE, U+0061 LATIN SMALL LETTER A through to U+0066 |
|
| 1092 | + // LATIN SMALL LETTER F, and U+0041 LATIN CAPITAL LETTER |
|
| 1093 | + // A, through to U+0046 LATIN CAPITAL LETTER F (in other |
|
| 1094 | + // words, 0-9, A-F, a-f). |
|
| 1095 | + $char = 1; |
|
| 1096 | + $char_class = '0-9A-Fa-f'; |
|
| 1097 | + break; |
|
| 1098 | + |
|
| 1099 | + // Anything else |
|
| 1100 | + default: |
|
| 1101 | + // Follow the steps below, but using the range of |
|
| 1102 | + // characters U+0030 DIGIT ZERO through to U+0039 DIGIT |
|
| 1103 | + // NINE (i.e. just 0-9). |
|
| 1104 | + $char = 0; |
|
| 1105 | + $char_class = '0-9'; |
|
| 1106 | + break; |
|
| 1107 | + } |
|
| 1108 | + |
|
| 1109 | + // Consume as many characters as match the range of characters |
|
| 1110 | + // given above. |
|
| 1111 | + $this->char++; |
|
| 1112 | + $e_name = $this->characters($char_class, $this->char + $char + 1); |
|
| 1113 | + $entity = $this->character($start, $this->char); |
|
| 1114 | + $cond = strlen($e_name) > 0; |
|
| 1115 | + |
|
| 1116 | + // The rest of the parsing happens bellow. |
|
| 1117 | + break; |
|
| 1118 | + |
|
| 1119 | + // Anything else |
|
| 1120 | + default: |
|
| 1121 | + // Consume the maximum number of characters possible, with the |
|
| 1122 | + // consumed characters case-sensitively matching one of the |
|
| 1123 | + // identifiers in the first column of the entities table. |
|
| 1124 | + $e_name = $this->characters('0-9A-Za-z;', $this->char + 1); |
|
| 1125 | + $len = strlen($e_name); |
|
| 1126 | + |
|
| 1127 | + for($c = 1; $c <= $len; $c++) { |
|
| 1128 | + $id = substr($e_name, 0, $c); |
|
| 1129 | + $this->char++; |
|
| 1130 | + |
|
| 1131 | + if(in_array($id, $this->entities)) { |
|
| 1132 | + if ($e_name[$c-1] !== ';') { |
|
| 1133 | + if ($c < $len && $e_name[$c] == ';') { |
|
| 1134 | + $this->char++; // consume extra semicolon |
|
| 1135 | + } |
|
| 1136 | + } |
|
| 1137 | + $entity = $id; |
|
| 1138 | + break; |
|
| 1139 | + } |
|
| 1140 | + } |
|
| 1141 | + |
|
| 1142 | + $cond = isset($entity); |
|
| 1143 | + // The rest of the parsing happens bellow. |
|
| 1144 | + break; |
|
| 1145 | + } |
|
| 1146 | + |
|
| 1147 | + if(!$cond) { |
|
| 1148 | + // If no match can be made, then this is a parse error. No |
|
| 1149 | + // characters are consumed, and nothing is returned. |
|
| 1150 | + $this->char = $start; |
|
| 1151 | + return false; |
|
| 1152 | + } |
|
| 1153 | + |
|
| 1154 | + // Return a character token for the character corresponding to the |
|
| 1155 | + // entity name (as given by the second column of the entities table). |
|
| 1156 | + return html_entity_decode('&'.$entity.';', ENT_QUOTES, 'UTF-8'); |
|
| 1157 | + } |
|
| 1158 | + |
|
| 1159 | + private function emitToken($token) { |
|
| 1160 | + $emit = $this->tree->emitToken($token); |
|
| 1161 | + |
|
| 1162 | + if(is_int($emit)) { |
|
| 1163 | + $this->content_model = $emit; |
|
| 1164 | + |
|
| 1165 | + } elseif($token['type'] === self::ENDTAG) { |
|
| 1166 | + $this->content_model = self::PCDATA; |
|
| 1167 | + } |
|
| 1168 | + } |
|
| 1169 | + |
|
| 1170 | + private function EOF() { |
|
| 1171 | + $this->state = null; |
|
| 1172 | + $this->tree->emitToken(array( |
|
| 1173 | + 'type' => self::EOF |
|
| 1174 | + )); |
|
| 1175 | + } |
|
| 1176 | 1176 | } |
| 1177 | 1177 | |
| 1178 | 1178 | class HTML5TreeConstructer { |
| 1179 | - public $stack = array(); |
|
| 1180 | - |
|
| 1181 | - private $phase; |
|
| 1182 | - private $mode; |
|
| 1183 | - private $dom; |
|
| 1184 | - private $foster_parent = null; |
|
| 1185 | - private $a_formatting = array(); |
|
| 1186 | - |
|
| 1187 | - private $head_pointer = null; |
|
| 1188 | - private $form_pointer = null; |
|
| 1189 | - |
|
| 1190 | - private $scoping = array('button','caption','html','marquee','object','table','td','th'); |
|
| 1191 | - private $formatting = array('a','b','big','em','font','i','nobr','s','small','strike','strong','tt','u'); |
|
| 1192 | - private $special = array('address','area','base','basefont','bgsound', |
|
| 1193 | - 'blockquote','body','br','center','col','colgroup','dd','dir','div','dl', |
|
| 1194 | - 'dt','embed','fieldset','form','frame','frameset','h1','h2','h3','h4','h5', |
|
| 1195 | - 'h6','head','hr','iframe','image','img','input','isindex','li','link', |
|
| 1196 | - 'listing','menu','meta','noembed','noframes','noscript','ol','optgroup', |
|
| 1197 | - 'option','p','param','plaintext','pre','script','select','spacer','style', |
|
| 1198 | - 'tbody','textarea','tfoot','thead','title','tr','ul','wbr'); |
|
| 1199 | - |
|
| 1200 | - // The different phases. |
|
| 1201 | - const INIT_PHASE = 0; |
|
| 1202 | - const ROOT_PHASE = 1; |
|
| 1203 | - const MAIN_PHASE = 2; |
|
| 1204 | - const END_PHASE = 3; |
|
| 1205 | - |
|
| 1206 | - // The different insertion modes for the main phase. |
|
| 1207 | - const BEFOR_HEAD = 0; |
|
| 1208 | - const IN_HEAD = 1; |
|
| 1209 | - const AFTER_HEAD = 2; |
|
| 1210 | - const IN_BODY = 3; |
|
| 1211 | - const IN_TABLE = 4; |
|
| 1212 | - const IN_CAPTION = 5; |
|
| 1213 | - const IN_CGROUP = 6; |
|
| 1214 | - const IN_TBODY = 7; |
|
| 1215 | - const IN_ROW = 8; |
|
| 1216 | - const IN_CELL = 9; |
|
| 1217 | - const IN_SELECT = 10; |
|
| 1218 | - const AFTER_BODY = 11; |
|
| 1219 | - const IN_FRAME = 12; |
|
| 1220 | - const AFTR_FRAME = 13; |
|
| 1221 | - |
|
| 1222 | - // The different types of elements. |
|
| 1223 | - const SPECIAL = 0; |
|
| 1224 | - const SCOPING = 1; |
|
| 1225 | - const FORMATTING = 2; |
|
| 1226 | - const PHRASING = 3; |
|
| 1227 | - |
|
| 1228 | - const MARKER = 0; |
|
| 1229 | - |
|
| 1230 | - public function __construct() { |
|
| 1231 | - $this->phase = self::INIT_PHASE; |
|
| 1232 | - $this->mode = self::BEFOR_HEAD; |
|
| 1233 | - $this->dom = new DOMDocument; |
|
| 1234 | - |
|
| 1235 | - $this->dom->encoding = 'UTF-8'; |
|
| 1236 | - $this->dom->preserveWhiteSpace = true; |
|
| 1237 | - $this->dom->substituteEntities = true; |
|
| 1238 | - $this->dom->strictErrorChecking = false; |
|
| 1239 | - } |
|
| 1240 | - |
|
| 1241 | - // Process tag tokens |
|
| 1242 | - public function emitToken($token) { |
|
| 1243 | - switch($this->phase) { |
|
| 1244 | - case self::INIT_PHASE: return $this->initPhase($token); break; |
|
| 1245 | - case self::ROOT_PHASE: return $this->rootElementPhase($token); break; |
|
| 1246 | - case self::MAIN_PHASE: return $this->mainPhase($token); break; |
|
| 1247 | - case self::END_PHASE : return $this->trailingEndPhase($token); break; |
|
| 1248 | - } |
|
| 1249 | - } |
|
| 1250 | - |
|
| 1251 | - private function initPhase($token) { |
|
| 1252 | - /* Initially, the tree construction stage must handle each token |
|
| 1179 | + public $stack = array(); |
|
| 1180 | + |
|
| 1181 | + private $phase; |
|
| 1182 | + private $mode; |
|
| 1183 | + private $dom; |
|
| 1184 | + private $foster_parent = null; |
|
| 1185 | + private $a_formatting = array(); |
|
| 1186 | + |
|
| 1187 | + private $head_pointer = null; |
|
| 1188 | + private $form_pointer = null; |
|
| 1189 | + |
|
| 1190 | + private $scoping = array('button','caption','html','marquee','object','table','td','th'); |
|
| 1191 | + private $formatting = array('a','b','big','em','font','i','nobr','s','small','strike','strong','tt','u'); |
|
| 1192 | + private $special = array('address','area','base','basefont','bgsound', |
|
| 1193 | + 'blockquote','body','br','center','col','colgroup','dd','dir','div','dl', |
|
| 1194 | + 'dt','embed','fieldset','form','frame','frameset','h1','h2','h3','h4','h5', |
|
| 1195 | + 'h6','head','hr','iframe','image','img','input','isindex','li','link', |
|
| 1196 | + 'listing','menu','meta','noembed','noframes','noscript','ol','optgroup', |
|
| 1197 | + 'option','p','param','plaintext','pre','script','select','spacer','style', |
|
| 1198 | + 'tbody','textarea','tfoot','thead','title','tr','ul','wbr'); |
|
| 1199 | + |
|
| 1200 | + // The different phases. |
|
| 1201 | + const INIT_PHASE = 0; |
|
| 1202 | + const ROOT_PHASE = 1; |
|
| 1203 | + const MAIN_PHASE = 2; |
|
| 1204 | + const END_PHASE = 3; |
|
| 1205 | + |
|
| 1206 | + // The different insertion modes for the main phase. |
|
| 1207 | + const BEFOR_HEAD = 0; |
|
| 1208 | + const IN_HEAD = 1; |
|
| 1209 | + const AFTER_HEAD = 2; |
|
| 1210 | + const IN_BODY = 3; |
|
| 1211 | + const IN_TABLE = 4; |
|
| 1212 | + const IN_CAPTION = 5; |
|
| 1213 | + const IN_CGROUP = 6; |
|
| 1214 | + const IN_TBODY = 7; |
|
| 1215 | + const IN_ROW = 8; |
|
| 1216 | + const IN_CELL = 9; |
|
| 1217 | + const IN_SELECT = 10; |
|
| 1218 | + const AFTER_BODY = 11; |
|
| 1219 | + const IN_FRAME = 12; |
|
| 1220 | + const AFTR_FRAME = 13; |
|
| 1221 | + |
|
| 1222 | + // The different types of elements. |
|
| 1223 | + const SPECIAL = 0; |
|
| 1224 | + const SCOPING = 1; |
|
| 1225 | + const FORMATTING = 2; |
|
| 1226 | + const PHRASING = 3; |
|
| 1227 | + |
|
| 1228 | + const MARKER = 0; |
|
| 1229 | + |
|
| 1230 | + public function __construct() { |
|
| 1231 | + $this->phase = self::INIT_PHASE; |
|
| 1232 | + $this->mode = self::BEFOR_HEAD; |
|
| 1233 | + $this->dom = new DOMDocument; |
|
| 1234 | + |
|
| 1235 | + $this->dom->encoding = 'UTF-8'; |
|
| 1236 | + $this->dom->preserveWhiteSpace = true; |
|
| 1237 | + $this->dom->substituteEntities = true; |
|
| 1238 | + $this->dom->strictErrorChecking = false; |
|
| 1239 | + } |
|
| 1240 | + |
|
| 1241 | + // Process tag tokens |
|
| 1242 | + public function emitToken($token) { |
|
| 1243 | + switch($this->phase) { |
|
| 1244 | + case self::INIT_PHASE: return $this->initPhase($token); break; |
|
| 1245 | + case self::ROOT_PHASE: return $this->rootElementPhase($token); break; |
|
| 1246 | + case self::MAIN_PHASE: return $this->mainPhase($token); break; |
|
| 1247 | + case self::END_PHASE : return $this->trailingEndPhase($token); break; |
|
| 1248 | + } |
|
| 1249 | + } |
|
| 1250 | + |
|
| 1251 | + private function initPhase($token) { |
|
| 1252 | + /* Initially, the tree construction stage must handle each token |
|
| 1253 | 1253 | emitted from the tokenisation stage as follows: */ |
| 1254 | 1254 | |
| 1255 | - /* A DOCTYPE token that is marked as being in error |
|
| 1255 | + /* A DOCTYPE token that is marked as being in error |
|
| 1256 | 1256 | A comment token |
| 1257 | 1257 | A start tag token |
| 1258 | 1258 | An end tag token |
@@ -1260,575 +1260,575 @@ discard block |
||
| 1260 | 1260 | U+000A LINE FEED (LF), U+000B LINE TABULATION, U+000C FORM FEED (FF), |
| 1261 | 1261 | or U+0020 SPACE |
| 1262 | 1262 | An end-of-file token */ |
| 1263 | - if((isset($token['error']) && $token['error']) || |
|
| 1264 | - $token['type'] === HTML5::COMMENT || |
|
| 1265 | - $token['type'] === HTML5::STARTTAG || |
|
| 1266 | - $token['type'] === HTML5::ENDTAG || |
|
| 1267 | - $token['type'] === HTML5::EOF || |
|
| 1268 | - ($token['type'] === HTML5::CHARACTR && isset($token['data']) && |
|
| 1269 | - !preg_match('/^[\t\n\x0b\x0c ]+$/', $token['data']))) { |
|
| 1270 | - /* This specification does not define how to handle this case. In |
|
| 1263 | + if((isset($token['error']) && $token['error']) || |
|
| 1264 | + $token['type'] === HTML5::COMMENT || |
|
| 1265 | + $token['type'] === HTML5::STARTTAG || |
|
| 1266 | + $token['type'] === HTML5::ENDTAG || |
|
| 1267 | + $token['type'] === HTML5::EOF || |
|
| 1268 | + ($token['type'] === HTML5::CHARACTR && isset($token['data']) && |
|
| 1269 | + !preg_match('/^[\t\n\x0b\x0c ]+$/', $token['data']))) { |
|
| 1270 | + /* This specification does not define how to handle this case. In |
|
| 1271 | 1271 | particular, user agents may ignore the entirety of this specification |
| 1272 | 1272 | altogether for such documents, and instead invoke special parse modes |
| 1273 | 1273 | with a greater emphasis on backwards compatibility. */ |
| 1274 | 1274 | |
| 1275 | - $this->phase = self::ROOT_PHASE; |
|
| 1276 | - return $this->rootElementPhase($token); |
|
| 1275 | + $this->phase = self::ROOT_PHASE; |
|
| 1276 | + return $this->rootElementPhase($token); |
|
| 1277 | 1277 | |
| 1278 | - /* A DOCTYPE token marked as being correct */ |
|
| 1279 | - } elseif(isset($token['error']) && !$token['error']) { |
|
| 1280 | - /* Append a DocumentType node to the Document node, with the name |
|
| 1278 | + /* A DOCTYPE token marked as being correct */ |
|
| 1279 | + } elseif(isset($token['error']) && !$token['error']) { |
|
| 1280 | + /* Append a DocumentType node to the Document node, with the name |
|
| 1281 | 1281 | attribute set to the name given in the DOCTYPE token (which will be |
| 1282 | 1282 | "HTML"), and the other attributes specific to DocumentType objects |
| 1283 | 1283 | set to null, empty lists, or the empty string as appropriate. */ |
| 1284 | - $doctype = new DOMDocumentType(null, null, 'HTML'); |
|
| 1284 | + $doctype = new DOMDocumentType(null, null, 'HTML'); |
|
| 1285 | 1285 | |
| 1286 | - /* Then, switch to the root element phase of the tree construction |
|
| 1286 | + /* Then, switch to the root element phase of the tree construction |
|
| 1287 | 1287 | stage. */ |
| 1288 | - $this->phase = self::ROOT_PHASE; |
|
| 1288 | + $this->phase = self::ROOT_PHASE; |
|
| 1289 | 1289 | |
| 1290 | - /* A character token that is one of one of U+0009 CHARACTER TABULATION, |
|
| 1290 | + /* A character token that is one of one of U+0009 CHARACTER TABULATION, |
|
| 1291 | 1291 | U+000A LINE FEED (LF), U+000B LINE TABULATION, U+000C FORM FEED (FF), |
| 1292 | 1292 | or U+0020 SPACE */ |
| 1293 | - } elseif(isset($token['data']) && preg_match('/^[\t\n\x0b\x0c ]+$/', |
|
| 1294 | - $token['data'])) { |
|
| 1295 | - /* Append that character to the Document node. */ |
|
| 1296 | - $text = $this->dom->createTextNode($token['data']); |
|
| 1297 | - $this->dom->appendChild($text); |
|
| 1298 | - } |
|
| 1299 | - } |
|
| 1300 | - |
|
| 1301 | - private function rootElementPhase($token) { |
|
| 1302 | - /* After the initial phase, as each token is emitted from the tokenisation |
|
| 1293 | + } elseif(isset($token['data']) && preg_match('/^[\t\n\x0b\x0c ]+$/', |
|
| 1294 | + $token['data'])) { |
|
| 1295 | + /* Append that character to the Document node. */ |
|
| 1296 | + $text = $this->dom->createTextNode($token['data']); |
|
| 1297 | + $this->dom->appendChild($text); |
|
| 1298 | + } |
|
| 1299 | + } |
|
| 1300 | + |
|
| 1301 | + private function rootElementPhase($token) { |
|
| 1302 | + /* After the initial phase, as each token is emitted from the tokenisation |
|
| 1303 | 1303 | stage, it must be processed as described in this section. */ |
| 1304 | 1304 | |
| 1305 | - /* A DOCTYPE token */ |
|
| 1306 | - if($token['type'] === HTML5::DOCTYPE) { |
|
| 1307 | - // Parse error. Ignore the token. |
|
| 1305 | + /* A DOCTYPE token */ |
|
| 1306 | + if($token['type'] === HTML5::DOCTYPE) { |
|
| 1307 | + // Parse error. Ignore the token. |
|
| 1308 | 1308 | |
| 1309 | - /* A comment token */ |
|
| 1310 | - } elseif($token['type'] === HTML5::COMMENT) { |
|
| 1311 | - /* Append a Comment node to the Document object with the data |
|
| 1309 | + /* A comment token */ |
|
| 1310 | + } elseif($token['type'] === HTML5::COMMENT) { |
|
| 1311 | + /* Append a Comment node to the Document object with the data |
|
| 1312 | 1312 | attribute set to the data given in the comment token. */ |
| 1313 | - $comment = $this->dom->createComment($token['data']); |
|
| 1314 | - $this->dom->appendChild($comment); |
|
| 1313 | + $comment = $this->dom->createComment($token['data']); |
|
| 1314 | + $this->dom->appendChild($comment); |
|
| 1315 | 1315 | |
| 1316 | - /* A character token that is one of one of U+0009 CHARACTER TABULATION, |
|
| 1316 | + /* A character token that is one of one of U+0009 CHARACTER TABULATION, |
|
| 1317 | 1317 | U+000A LINE FEED (LF), U+000B LINE TABULATION, U+000C FORM FEED (FF), |
| 1318 | 1318 | or U+0020 SPACE */ |
| 1319 | - } elseif($token['type'] === HTML5::CHARACTR && |
|
| 1320 | - preg_match('/^[\t\n\x0b\x0c ]+$/', $token['data'])) { |
|
| 1321 | - /* Append that character to the Document node. */ |
|
| 1322 | - $text = $this->dom->createTextNode($token['data']); |
|
| 1323 | - $this->dom->appendChild($text); |
|
| 1319 | + } elseif($token['type'] === HTML5::CHARACTR && |
|
| 1320 | + preg_match('/^[\t\n\x0b\x0c ]+$/', $token['data'])) { |
|
| 1321 | + /* Append that character to the Document node. */ |
|
| 1322 | + $text = $this->dom->createTextNode($token['data']); |
|
| 1323 | + $this->dom->appendChild($text); |
|
| 1324 | 1324 | |
| 1325 | - /* A character token that is not one of U+0009 CHARACTER TABULATION, |
|
| 1325 | + /* A character token that is not one of U+0009 CHARACTER TABULATION, |
|
| 1326 | 1326 | U+000A LINE FEED (LF), U+000B LINE TABULATION, U+000C FORM FEED |
| 1327 | 1327 | (FF), or U+0020 SPACE |
| 1328 | 1328 | A start tag token |
| 1329 | 1329 | An end tag token |
| 1330 | 1330 | An end-of-file token */ |
| 1331 | - } elseif(($token['type'] === HTML5::CHARACTR && |
|
| 1332 | - !preg_match('/^[\t\n\x0b\x0c ]+$/', $token['data'])) || |
|
| 1333 | - $token['type'] === HTML5::STARTTAG || |
|
| 1334 | - $token['type'] === HTML5::ENDTAG || |
|
| 1335 | - $token['type'] === HTML5::EOF) { |
|
| 1336 | - /* Create an HTMLElement node with the tag name html, in the HTML |
|
| 1331 | + } elseif(($token['type'] === HTML5::CHARACTR && |
|
| 1332 | + !preg_match('/^[\t\n\x0b\x0c ]+$/', $token['data'])) || |
|
| 1333 | + $token['type'] === HTML5::STARTTAG || |
|
| 1334 | + $token['type'] === HTML5::ENDTAG || |
|
| 1335 | + $token['type'] === HTML5::EOF) { |
|
| 1336 | + /* Create an HTMLElement node with the tag name html, in the HTML |
|
| 1337 | 1337 | namespace. Append it to the Document object. Switch to the main |
| 1338 | 1338 | phase and reprocess the current token. */ |
| 1339 | - $html = $this->dom->createElement('html'); |
|
| 1340 | - $this->dom->appendChild($html); |
|
| 1341 | - $this->stack[] = $html; |
|
| 1339 | + $html = $this->dom->createElement('html'); |
|
| 1340 | + $this->dom->appendChild($html); |
|
| 1341 | + $this->stack[] = $html; |
|
| 1342 | 1342 | |
| 1343 | - $this->phase = self::MAIN_PHASE; |
|
| 1344 | - return $this->mainPhase($token); |
|
| 1345 | - } |
|
| 1346 | - } |
|
| 1343 | + $this->phase = self::MAIN_PHASE; |
|
| 1344 | + return $this->mainPhase($token); |
|
| 1345 | + } |
|
| 1346 | + } |
|
| 1347 | 1347 | |
| 1348 | - private function mainPhase($token) { |
|
| 1349 | - /* Tokens in the main phase must be handled as follows: */ |
|
| 1348 | + private function mainPhase($token) { |
|
| 1349 | + /* Tokens in the main phase must be handled as follows: */ |
|
| 1350 | 1350 | |
| 1351 | - /* A DOCTYPE token */ |
|
| 1352 | - if($token['type'] === HTML5::DOCTYPE) { |
|
| 1353 | - // Parse error. Ignore the token. |
|
| 1351 | + /* A DOCTYPE token */ |
|
| 1352 | + if($token['type'] === HTML5::DOCTYPE) { |
|
| 1353 | + // Parse error. Ignore the token. |
|
| 1354 | 1354 | |
| 1355 | - /* A start tag token with the tag name "html" */ |
|
| 1356 | - } elseif($token['type'] === HTML5::STARTTAG && $token['name'] === 'html') { |
|
| 1357 | - /* If this start tag token was not the first start tag token, then |
|
| 1355 | + /* A start tag token with the tag name "html" */ |
|
| 1356 | + } elseif($token['type'] === HTML5::STARTTAG && $token['name'] === 'html') { |
|
| 1357 | + /* If this start tag token was not the first start tag token, then |
|
| 1358 | 1358 | it is a parse error. */ |
| 1359 | 1359 | |
| 1360 | - /* For each attribute on the token, check to see if the attribute |
|
| 1360 | + /* For each attribute on the token, check to see if the attribute |
|
| 1361 | 1361 | is already present on the top element of the stack of open elements. |
| 1362 | 1362 | If it is not, add the attribute and its corresponding value to that |
| 1363 | 1363 | element. */ |
| 1364 | - foreach($token['attr'] as $attr) { |
|
| 1365 | - if(!$this->stack[0]->hasAttribute($attr['name'])) { |
|
| 1366 | - $this->stack[0]->setAttribute($attr['name'], $attr['value']); |
|
| 1367 | - } |
|
| 1368 | - } |
|
| 1369 | - |
|
| 1370 | - /* An end-of-file token */ |
|
| 1371 | - } elseif($token['type'] === HTML5::EOF) { |
|
| 1372 | - /* Generate implied end tags. */ |
|
| 1373 | - $this->generateImpliedEndTags(); |
|
| 1374 | - |
|
| 1375 | - /* Anything else. */ |
|
| 1376 | - } else { |
|
| 1377 | - /* Depends on the insertion mode: */ |
|
| 1378 | - switch($this->mode) { |
|
| 1379 | - case self::BEFOR_HEAD: return $this->beforeHead($token); break; |
|
| 1380 | - case self::IN_HEAD: return $this->inHead($token); break; |
|
| 1381 | - case self::AFTER_HEAD: return $this->afterHead($token); break; |
|
| 1382 | - case self::IN_BODY: return $this->inBody($token); break; |
|
| 1383 | - case self::IN_TABLE: return $this->inTable($token); break; |
|
| 1384 | - case self::IN_CAPTION: return $this->inCaption($token); break; |
|
| 1385 | - case self::IN_CGROUP: return $this->inColumnGroup($token); break; |
|
| 1386 | - case self::IN_TBODY: return $this->inTableBody($token); break; |
|
| 1387 | - case self::IN_ROW: return $this->inRow($token); break; |
|
| 1388 | - case self::IN_CELL: return $this->inCell($token); break; |
|
| 1389 | - case self::IN_SELECT: return $this->inSelect($token); break; |
|
| 1390 | - case self::AFTER_BODY: return $this->afterBody($token); break; |
|
| 1391 | - case self::IN_FRAME: return $this->inFrameset($token); break; |
|
| 1392 | - case self::AFTR_FRAME: return $this->afterFrameset($token); break; |
|
| 1393 | - case self::END_PHASE: return $this->trailingEndPhase($token); break; |
|
| 1394 | - } |
|
| 1395 | - } |
|
| 1396 | - } |
|
| 1397 | - |
|
| 1398 | - private function beforeHead($token) { |
|
| 1399 | - /* Handle the token as follows: */ |
|
| 1400 | - |
|
| 1401 | - /* A character token that is one of one of U+0009 CHARACTER TABULATION, |
|
| 1364 | + foreach($token['attr'] as $attr) { |
|
| 1365 | + if(!$this->stack[0]->hasAttribute($attr['name'])) { |
|
| 1366 | + $this->stack[0]->setAttribute($attr['name'], $attr['value']); |
|
| 1367 | + } |
|
| 1368 | + } |
|
| 1369 | + |
|
| 1370 | + /* An end-of-file token */ |
|
| 1371 | + } elseif($token['type'] === HTML5::EOF) { |
|
| 1372 | + /* Generate implied end tags. */ |
|
| 1373 | + $this->generateImpliedEndTags(); |
|
| 1374 | + |
|
| 1375 | + /* Anything else. */ |
|
| 1376 | + } else { |
|
| 1377 | + /* Depends on the insertion mode: */ |
|
| 1378 | + switch($this->mode) { |
|
| 1379 | + case self::BEFOR_HEAD: return $this->beforeHead($token); break; |
|
| 1380 | + case self::IN_HEAD: return $this->inHead($token); break; |
|
| 1381 | + case self::AFTER_HEAD: return $this->afterHead($token); break; |
|
| 1382 | + case self::IN_BODY: return $this->inBody($token); break; |
|
| 1383 | + case self::IN_TABLE: return $this->inTable($token); break; |
|
| 1384 | + case self::IN_CAPTION: return $this->inCaption($token); break; |
|
| 1385 | + case self::IN_CGROUP: return $this->inColumnGroup($token); break; |
|
| 1386 | + case self::IN_TBODY: return $this->inTableBody($token); break; |
|
| 1387 | + case self::IN_ROW: return $this->inRow($token); break; |
|
| 1388 | + case self::IN_CELL: return $this->inCell($token); break; |
|
| 1389 | + case self::IN_SELECT: return $this->inSelect($token); break; |
|
| 1390 | + case self::AFTER_BODY: return $this->afterBody($token); break; |
|
| 1391 | + case self::IN_FRAME: return $this->inFrameset($token); break; |
|
| 1392 | + case self::AFTR_FRAME: return $this->afterFrameset($token); break; |
|
| 1393 | + case self::END_PHASE: return $this->trailingEndPhase($token); break; |
|
| 1394 | + } |
|
| 1395 | + } |
|
| 1396 | + } |
|
| 1397 | + |
|
| 1398 | + private function beforeHead($token) { |
|
| 1399 | + /* Handle the token as follows: */ |
|
| 1400 | + |
|
| 1401 | + /* A character token that is one of one of U+0009 CHARACTER TABULATION, |
|
| 1402 | 1402 | U+000A LINE FEED (LF), U+000B LINE TABULATION, U+000C FORM FEED (FF), |
| 1403 | 1403 | or U+0020 SPACE */ |
| 1404 | - if($token['type'] === HTML5::CHARACTR && |
|
| 1405 | - preg_match('/^[\t\n\x0b\x0c ]+$/', $token['data'])) { |
|
| 1406 | - /* Append the character to the current node. */ |
|
| 1407 | - $this->insertText($token['data']); |
|
| 1408 | - |
|
| 1409 | - /* A comment token */ |
|
| 1410 | - } elseif($token['type'] === HTML5::COMMENT) { |
|
| 1411 | - /* Append a Comment node to the current node with the data attribute |
|
| 1404 | + if($token['type'] === HTML5::CHARACTR && |
|
| 1405 | + preg_match('/^[\t\n\x0b\x0c ]+$/', $token['data'])) { |
|
| 1406 | + /* Append the character to the current node. */ |
|
| 1407 | + $this->insertText($token['data']); |
|
| 1408 | + |
|
| 1409 | + /* A comment token */ |
|
| 1410 | + } elseif($token['type'] === HTML5::COMMENT) { |
|
| 1411 | + /* Append a Comment node to the current node with the data attribute |
|
| 1412 | 1412 | set to the data given in the comment token. */ |
| 1413 | - $this->insertComment($token['data']); |
|
| 1413 | + $this->insertComment($token['data']); |
|
| 1414 | 1414 | |
| 1415 | - /* A start tag token with the tag name "head" */ |
|
| 1416 | - } elseif($token['type'] === HTML5::STARTTAG && $token['name'] === 'head') { |
|
| 1417 | - /* Create an element for the token, append the new element to the |
|
| 1415 | + /* A start tag token with the tag name "head" */ |
|
| 1416 | + } elseif($token['type'] === HTML5::STARTTAG && $token['name'] === 'head') { |
|
| 1417 | + /* Create an element for the token, append the new element to the |
|
| 1418 | 1418 | current node and push it onto the stack of open elements. */ |
| 1419 | - $element = $this->insertElement($token); |
|
| 1419 | + $element = $this->insertElement($token); |
|
| 1420 | 1420 | |
| 1421 | - /* Set the head element pointer to this new element node. */ |
|
| 1422 | - $this->head_pointer = $element; |
|
| 1421 | + /* Set the head element pointer to this new element node. */ |
|
| 1422 | + $this->head_pointer = $element; |
|
| 1423 | 1423 | |
| 1424 | - /* Change the insertion mode to "in head". */ |
|
| 1425 | - $this->mode = self::IN_HEAD; |
|
| 1424 | + /* Change the insertion mode to "in head". */ |
|
| 1425 | + $this->mode = self::IN_HEAD; |
|
| 1426 | 1426 | |
| 1427 | - /* A start tag token whose tag name is one of: "base", "link", "meta", |
|
| 1427 | + /* A start tag token whose tag name is one of: "base", "link", "meta", |
|
| 1428 | 1428 | "script", "style", "title". Or an end tag with the tag name "html". |
| 1429 | 1429 | Or a character token that is not one of U+0009 CHARACTER TABULATION, |
| 1430 | 1430 | U+000A LINE FEED (LF), U+000B LINE TABULATION, U+000C FORM FEED (FF), |
| 1431 | 1431 | or U+0020 SPACE. Or any other start tag token */ |
| 1432 | - } elseif($token['type'] === HTML5::STARTTAG || |
|
| 1433 | - ($token['type'] === HTML5::ENDTAG && $token['name'] === 'html') || |
|
| 1434 | - ($token['type'] === HTML5::CHARACTR && !preg_match('/^[\t\n\x0b\x0c ]$/', |
|
| 1435 | - $token['data']))) { |
|
| 1436 | - /* Act as if a start tag token with the tag name "head" and no |
|
| 1432 | + } elseif($token['type'] === HTML5::STARTTAG || |
|
| 1433 | + ($token['type'] === HTML5::ENDTAG && $token['name'] === 'html') || |
|
| 1434 | + ($token['type'] === HTML5::CHARACTR && !preg_match('/^[\t\n\x0b\x0c ]$/', |
|
| 1435 | + $token['data']))) { |
|
| 1436 | + /* Act as if a start tag token with the tag name "head" and no |
|
| 1437 | 1437 | attributes had been seen, then reprocess the current token. */ |
| 1438 | - $this->beforeHead(array( |
|
| 1439 | - 'name' => 'head', |
|
| 1440 | - 'type' => HTML5::STARTTAG, |
|
| 1441 | - 'attr' => array() |
|
| 1442 | - )); |
|
| 1438 | + $this->beforeHead(array( |
|
| 1439 | + 'name' => 'head', |
|
| 1440 | + 'type' => HTML5::STARTTAG, |
|
| 1441 | + 'attr' => array() |
|
| 1442 | + )); |
|
| 1443 | 1443 | |
| 1444 | - return $this->inHead($token); |
|
| 1444 | + return $this->inHead($token); |
|
| 1445 | 1445 | |
| 1446 | - /* Any other end tag */ |
|
| 1447 | - } elseif($token['type'] === HTML5::ENDTAG) { |
|
| 1448 | - /* Parse error. Ignore the token. */ |
|
| 1449 | - } |
|
| 1450 | - } |
|
| 1446 | + /* Any other end tag */ |
|
| 1447 | + } elseif($token['type'] === HTML5::ENDTAG) { |
|
| 1448 | + /* Parse error. Ignore the token. */ |
|
| 1449 | + } |
|
| 1450 | + } |
|
| 1451 | 1451 | |
| 1452 | - private function inHead($token) { |
|
| 1453 | - /* Handle the token as follows: */ |
|
| 1452 | + private function inHead($token) { |
|
| 1453 | + /* Handle the token as follows: */ |
|
| 1454 | 1454 | |
| 1455 | - /* A character token that is one of one of U+0009 CHARACTER TABULATION, |
|
| 1455 | + /* A character token that is one of one of U+0009 CHARACTER TABULATION, |
|
| 1456 | 1456 | U+000A LINE FEED (LF), U+000B LINE TABULATION, U+000C FORM FEED (FF), |
| 1457 | 1457 | or U+0020 SPACE. |
| 1458 | 1458 | |
| 1459 | 1459 | THIS DIFFERS FROM THE SPEC: If the current node is either a title, style |
| 1460 | 1460 | or script element, append the character to the current node regardless |
| 1461 | 1461 | of its content. */ |
| 1462 | - if(($token['type'] === HTML5::CHARACTR && |
|
| 1463 | - preg_match('/^[\t\n\x0b\x0c ]+$/', $token['data'])) || ( |
|
| 1464 | - $token['type'] === HTML5::CHARACTR && in_array(end($this->stack)->nodeName, |
|
| 1465 | - array('title', 'style', 'script')))) { |
|
| 1466 | - /* Append the character to the current node. */ |
|
| 1467 | - $this->insertText($token['data']); |
|
| 1468 | - |
|
| 1469 | - /* A comment token */ |
|
| 1470 | - } elseif($token['type'] === HTML5::COMMENT) { |
|
| 1471 | - /* Append a Comment node to the current node with the data attribute |
|
| 1462 | + if(($token['type'] === HTML5::CHARACTR && |
|
| 1463 | + preg_match('/^[\t\n\x0b\x0c ]+$/', $token['data'])) || ( |
|
| 1464 | + $token['type'] === HTML5::CHARACTR && in_array(end($this->stack)->nodeName, |
|
| 1465 | + array('title', 'style', 'script')))) { |
|
| 1466 | + /* Append the character to the current node. */ |
|
| 1467 | + $this->insertText($token['data']); |
|
| 1468 | + |
|
| 1469 | + /* A comment token */ |
|
| 1470 | + } elseif($token['type'] === HTML5::COMMENT) { |
|
| 1471 | + /* Append a Comment node to the current node with the data attribute |
|
| 1472 | 1472 | set to the data given in the comment token. */ |
| 1473 | - $this->insertComment($token['data']); |
|
| 1473 | + $this->insertComment($token['data']); |
|
| 1474 | 1474 | |
| 1475 | - } elseif($token['type'] === HTML5::ENDTAG && |
|
| 1476 | - in_array($token['name'], array('title', 'style', 'script'))) { |
|
| 1477 | - array_pop($this->stack); |
|
| 1478 | - return HTML5::PCDATA; |
|
| 1475 | + } elseif($token['type'] === HTML5::ENDTAG && |
|
| 1476 | + in_array($token['name'], array('title', 'style', 'script'))) { |
|
| 1477 | + array_pop($this->stack); |
|
| 1478 | + return HTML5::PCDATA; |
|
| 1479 | 1479 | |
| 1480 | - /* A start tag with the tag name "title" */ |
|
| 1481 | - } elseif($token['type'] === HTML5::STARTTAG && $token['name'] === 'title') { |
|
| 1482 | - /* Create an element for the token and append the new element to the |
|
| 1480 | + /* A start tag with the tag name "title" */ |
|
| 1481 | + } elseif($token['type'] === HTML5::STARTTAG && $token['name'] === 'title') { |
|
| 1482 | + /* Create an element for the token and append the new element to the |
|
| 1483 | 1483 | node pointed to by the head element pointer, or, if that is null |
| 1484 | 1484 | (innerHTML case), to the current node. */ |
| 1485 | - if($this->head_pointer !== null) { |
|
| 1486 | - $element = $this->insertElement($token, false); |
|
| 1487 | - $this->head_pointer->appendChild($element); |
|
| 1485 | + if($this->head_pointer !== null) { |
|
| 1486 | + $element = $this->insertElement($token, false); |
|
| 1487 | + $this->head_pointer->appendChild($element); |
|
| 1488 | 1488 | |
| 1489 | - } else { |
|
| 1490 | - $element = $this->insertElement($token); |
|
| 1491 | - } |
|
| 1489 | + } else { |
|
| 1490 | + $element = $this->insertElement($token); |
|
| 1491 | + } |
|
| 1492 | 1492 | |
| 1493 | - /* Switch the tokeniser's content model flag to the RCDATA state. */ |
|
| 1494 | - return HTML5::RCDATA; |
|
| 1493 | + /* Switch the tokeniser's content model flag to the RCDATA state. */ |
|
| 1494 | + return HTML5::RCDATA; |
|
| 1495 | 1495 | |
| 1496 | - /* A start tag with the tag name "style" */ |
|
| 1497 | - } elseif($token['type'] === HTML5::STARTTAG && $token['name'] === 'style') { |
|
| 1498 | - /* Create an element for the token and append the new element to the |
|
| 1496 | + /* A start tag with the tag name "style" */ |
|
| 1497 | + } elseif($token['type'] === HTML5::STARTTAG && $token['name'] === 'style') { |
|
| 1498 | + /* Create an element for the token and append the new element to the |
|
| 1499 | 1499 | node pointed to by the head element pointer, or, if that is null |
| 1500 | 1500 | (innerHTML case), to the current node. */ |
| 1501 | - if($this->head_pointer !== null) { |
|
| 1502 | - $element = $this->insertElement($token, false); |
|
| 1503 | - $this->head_pointer->appendChild($element); |
|
| 1504 | - |
|
| 1505 | - } else { |
|
| 1506 | - $this->insertElement($token); |
|
| 1507 | - } |
|
| 1508 | - |
|
| 1509 | - /* Switch the tokeniser's content model flag to the CDATA state. */ |
|
| 1510 | - return HTML5::CDATA; |
|
| 1511 | - |
|
| 1512 | - /* A start tag with the tag name "script" */ |
|
| 1513 | - } elseif($token['type'] === HTML5::STARTTAG && $token['name'] === 'script') { |
|
| 1514 | - /* Create an element for the token. */ |
|
| 1515 | - $element = $this->insertElement($token, false); |
|
| 1516 | - $this->head_pointer->appendChild($element); |
|
| 1517 | - |
|
| 1518 | - /* Switch the tokeniser's content model flag to the CDATA state. */ |
|
| 1519 | - return HTML5::CDATA; |
|
| 1520 | - |
|
| 1521 | - /* A start tag with the tag name "base", "link", or "meta" */ |
|
| 1522 | - } elseif($token['type'] === HTML5::STARTTAG && in_array($token['name'], |
|
| 1523 | - array('base', 'link', 'meta'))) { |
|
| 1524 | - /* Create an element for the token and append the new element to the |
|
| 1501 | + if($this->head_pointer !== null) { |
|
| 1502 | + $element = $this->insertElement($token, false); |
|
| 1503 | + $this->head_pointer->appendChild($element); |
|
| 1504 | + |
|
| 1505 | + } else { |
|
| 1506 | + $this->insertElement($token); |
|
| 1507 | + } |
|
| 1508 | + |
|
| 1509 | + /* Switch the tokeniser's content model flag to the CDATA state. */ |
|
| 1510 | + return HTML5::CDATA; |
|
| 1511 | + |
|
| 1512 | + /* A start tag with the tag name "script" */ |
|
| 1513 | + } elseif($token['type'] === HTML5::STARTTAG && $token['name'] === 'script') { |
|
| 1514 | + /* Create an element for the token. */ |
|
| 1515 | + $element = $this->insertElement($token, false); |
|
| 1516 | + $this->head_pointer->appendChild($element); |
|
| 1517 | + |
|
| 1518 | + /* Switch the tokeniser's content model flag to the CDATA state. */ |
|
| 1519 | + return HTML5::CDATA; |
|
| 1520 | + |
|
| 1521 | + /* A start tag with the tag name "base", "link", or "meta" */ |
|
| 1522 | + } elseif($token['type'] === HTML5::STARTTAG && in_array($token['name'], |
|
| 1523 | + array('base', 'link', 'meta'))) { |
|
| 1524 | + /* Create an element for the token and append the new element to the |
|
| 1525 | 1525 | node pointed to by the head element pointer, or, if that is null |
| 1526 | 1526 | (innerHTML case), to the current node. */ |
| 1527 | - if($this->head_pointer !== null) { |
|
| 1528 | - $element = $this->insertElement($token, false); |
|
| 1529 | - $this->head_pointer->appendChild($element); |
|
| 1530 | - array_pop($this->stack); |
|
| 1531 | - |
|
| 1532 | - } else { |
|
| 1533 | - $this->insertElement($token); |
|
| 1534 | - } |
|
| 1535 | - |
|
| 1536 | - /* An end tag with the tag name "head" */ |
|
| 1537 | - } elseif($token['type'] === HTML5::ENDTAG && $token['name'] === 'head') { |
|
| 1538 | - /* If the current node is a head element, pop the current node off |
|
| 1527 | + if($this->head_pointer !== null) { |
|
| 1528 | + $element = $this->insertElement($token, false); |
|
| 1529 | + $this->head_pointer->appendChild($element); |
|
| 1530 | + array_pop($this->stack); |
|
| 1531 | + |
|
| 1532 | + } else { |
|
| 1533 | + $this->insertElement($token); |
|
| 1534 | + } |
|
| 1535 | + |
|
| 1536 | + /* An end tag with the tag name "head" */ |
|
| 1537 | + } elseif($token['type'] === HTML5::ENDTAG && $token['name'] === 'head') { |
|
| 1538 | + /* If the current node is a head element, pop the current node off |
|
| 1539 | 1539 | the stack of open elements. */ |
| 1540 | - if($this->head_pointer->isSameNode(end($this->stack))) { |
|
| 1541 | - array_pop($this->stack); |
|
| 1540 | + if($this->head_pointer->isSameNode(end($this->stack))) { |
|
| 1541 | + array_pop($this->stack); |
|
| 1542 | 1542 | |
| 1543 | - /* Otherwise, this is a parse error. */ |
|
| 1544 | - } else { |
|
| 1545 | - // k |
|
| 1546 | - } |
|
| 1543 | + /* Otherwise, this is a parse error. */ |
|
| 1544 | + } else { |
|
| 1545 | + // k |
|
| 1546 | + } |
|
| 1547 | 1547 | |
| 1548 | - /* Change the insertion mode to "after head". */ |
|
| 1549 | - $this->mode = self::AFTER_HEAD; |
|
| 1548 | + /* Change the insertion mode to "after head". */ |
|
| 1549 | + $this->mode = self::AFTER_HEAD; |
|
| 1550 | 1550 | |
| 1551 | - /* A start tag with the tag name "head" or an end tag except "html". */ |
|
| 1552 | - } elseif(($token['type'] === HTML5::STARTTAG && $token['name'] === 'head') || |
|
| 1553 | - ($token['type'] === HTML5::ENDTAG && $token['name'] !== 'html')) { |
|
| 1554 | - // Parse error. Ignore the token. |
|
| 1551 | + /* A start tag with the tag name "head" or an end tag except "html". */ |
|
| 1552 | + } elseif(($token['type'] === HTML5::STARTTAG && $token['name'] === 'head') || |
|
| 1553 | + ($token['type'] === HTML5::ENDTAG && $token['name'] !== 'html')) { |
|
| 1554 | + // Parse error. Ignore the token. |
|
| 1555 | 1555 | |
| 1556 | - /* Anything else */ |
|
| 1557 | - } else { |
|
| 1558 | - /* If the current node is a head element, act as if an end tag |
|
| 1556 | + /* Anything else */ |
|
| 1557 | + } else { |
|
| 1558 | + /* If the current node is a head element, act as if an end tag |
|
| 1559 | 1559 | token with the tag name "head" had been seen. */ |
| 1560 | - if($this->head_pointer->isSameNode(end($this->stack))) { |
|
| 1561 | - $this->inHead(array( |
|
| 1562 | - 'name' => 'head', |
|
| 1563 | - 'type' => HTML5::ENDTAG |
|
| 1564 | - )); |
|
| 1565 | - |
|
| 1566 | - /* Otherwise, change the insertion mode to "after head". */ |
|
| 1567 | - } else { |
|
| 1568 | - $this->mode = self::AFTER_HEAD; |
|
| 1569 | - } |
|
| 1570 | - |
|
| 1571 | - /* Then, reprocess the current token. */ |
|
| 1572 | - return $this->afterHead($token); |
|
| 1573 | - } |
|
| 1574 | - } |
|
| 1575 | - |
|
| 1576 | - private function afterHead($token) { |
|
| 1577 | - /* Handle the token as follows: */ |
|
| 1578 | - |
|
| 1579 | - /* A character token that is one of one of U+0009 CHARACTER TABULATION, |
|
| 1560 | + if($this->head_pointer->isSameNode(end($this->stack))) { |
|
| 1561 | + $this->inHead(array( |
|
| 1562 | + 'name' => 'head', |
|
| 1563 | + 'type' => HTML5::ENDTAG |
|
| 1564 | + )); |
|
| 1565 | + |
|
| 1566 | + /* Otherwise, change the insertion mode to "after head". */ |
|
| 1567 | + } else { |
|
| 1568 | + $this->mode = self::AFTER_HEAD; |
|
| 1569 | + } |
|
| 1570 | + |
|
| 1571 | + /* Then, reprocess the current token. */ |
|
| 1572 | + return $this->afterHead($token); |
|
| 1573 | + } |
|
| 1574 | + } |
|
| 1575 | + |
|
| 1576 | + private function afterHead($token) { |
|
| 1577 | + /* Handle the token as follows: */ |
|
| 1578 | + |
|
| 1579 | + /* A character token that is one of one of U+0009 CHARACTER TABULATION, |
|
| 1580 | 1580 | U+000A LINE FEED (LF), U+000B LINE TABULATION, U+000C FORM FEED (FF), |
| 1581 | 1581 | or U+0020 SPACE */ |
| 1582 | - if($token['type'] === HTML5::CHARACTR && |
|
| 1583 | - preg_match('/^[\t\n\x0b\x0c ]+$/', $token['data'])) { |
|
| 1584 | - /* Append the character to the current node. */ |
|
| 1585 | - $this->insertText($token['data']); |
|
| 1586 | - |
|
| 1587 | - /* A comment token */ |
|
| 1588 | - } elseif($token['type'] === HTML5::COMMENT) { |
|
| 1589 | - /* Append a Comment node to the current node with the data attribute |
|
| 1582 | + if($token['type'] === HTML5::CHARACTR && |
|
| 1583 | + preg_match('/^[\t\n\x0b\x0c ]+$/', $token['data'])) { |
|
| 1584 | + /* Append the character to the current node. */ |
|
| 1585 | + $this->insertText($token['data']); |
|
| 1586 | + |
|
| 1587 | + /* A comment token */ |
|
| 1588 | + } elseif($token['type'] === HTML5::COMMENT) { |
|
| 1589 | + /* Append a Comment node to the current node with the data attribute |
|
| 1590 | 1590 | set to the data given in the comment token. */ |
| 1591 | - $this->insertComment($token['data']); |
|
| 1591 | + $this->insertComment($token['data']); |
|
| 1592 | 1592 | |
| 1593 | - /* A start tag token with the tag name "body" */ |
|
| 1594 | - } elseif($token['type'] === HTML5::STARTTAG && $token['name'] === 'body') { |
|
| 1595 | - /* Insert a body element for the token. */ |
|
| 1596 | - $this->insertElement($token); |
|
| 1593 | + /* A start tag token with the tag name "body" */ |
|
| 1594 | + } elseif($token['type'] === HTML5::STARTTAG && $token['name'] === 'body') { |
|
| 1595 | + /* Insert a body element for the token. */ |
|
| 1596 | + $this->insertElement($token); |
|
| 1597 | 1597 | |
| 1598 | - /* Change the insertion mode to "in body". */ |
|
| 1599 | - $this->mode = self::IN_BODY; |
|
| 1598 | + /* Change the insertion mode to "in body". */ |
|
| 1599 | + $this->mode = self::IN_BODY; |
|
| 1600 | 1600 | |
| 1601 | - /* A start tag token with the tag name "frameset" */ |
|
| 1602 | - } elseif($token['type'] === HTML5::STARTTAG && $token['name'] === 'frameset') { |
|
| 1603 | - /* Insert a frameset element for the token. */ |
|
| 1604 | - $this->insertElement($token); |
|
| 1601 | + /* A start tag token with the tag name "frameset" */ |
|
| 1602 | + } elseif($token['type'] === HTML5::STARTTAG && $token['name'] === 'frameset') { |
|
| 1603 | + /* Insert a frameset element for the token. */ |
|
| 1604 | + $this->insertElement($token); |
|
| 1605 | 1605 | |
| 1606 | - /* Change the insertion mode to "in frameset". */ |
|
| 1607 | - $this->mode = self::IN_FRAME; |
|
| 1606 | + /* Change the insertion mode to "in frameset". */ |
|
| 1607 | + $this->mode = self::IN_FRAME; |
|
| 1608 | 1608 | |
| 1609 | - /* A start tag token whose tag name is one of: "base", "link", "meta", |
|
| 1609 | + /* A start tag token whose tag name is one of: "base", "link", "meta", |
|
| 1610 | 1610 | "script", "style", "title" */ |
| 1611 | - } elseif($token['type'] === HTML5::STARTTAG && in_array($token['name'], |
|
| 1612 | - array('base', 'link', 'meta', 'script', 'style', 'title'))) { |
|
| 1613 | - /* Parse error. Switch the insertion mode back to "in head" and |
|
| 1611 | + } elseif($token['type'] === HTML5::STARTTAG && in_array($token['name'], |
|
| 1612 | + array('base', 'link', 'meta', 'script', 'style', 'title'))) { |
|
| 1613 | + /* Parse error. Switch the insertion mode back to "in head" and |
|
| 1614 | 1614 | reprocess the token. */ |
| 1615 | - $this->mode = self::IN_HEAD; |
|
| 1616 | - return $this->inHead($token); |
|
| 1615 | + $this->mode = self::IN_HEAD; |
|
| 1616 | + return $this->inHead($token); |
|
| 1617 | 1617 | |
| 1618 | - /* Anything else */ |
|
| 1619 | - } else { |
|
| 1620 | - /* Act as if a start tag token with the tag name "body" and no |
|
| 1618 | + /* Anything else */ |
|
| 1619 | + } else { |
|
| 1620 | + /* Act as if a start tag token with the tag name "body" and no |
|
| 1621 | 1621 | attributes had been seen, and then reprocess the current token. */ |
| 1622 | - $this->afterHead(array( |
|
| 1623 | - 'name' => 'body', |
|
| 1624 | - 'type' => HTML5::STARTTAG, |
|
| 1625 | - 'attr' => array() |
|
| 1626 | - )); |
|
| 1627 | - |
|
| 1628 | - return $this->inBody($token); |
|
| 1629 | - } |
|
| 1630 | - } |
|
| 1631 | - |
|
| 1632 | - private function inBody($token) { |
|
| 1633 | - /* Handle the token as follows: */ |
|
| 1634 | - |
|
| 1635 | - switch($token['type']) { |
|
| 1636 | - /* A character token */ |
|
| 1637 | - case HTML5::CHARACTR: |
|
| 1638 | - /* Reconstruct the active formatting elements, if any. */ |
|
| 1639 | - $this->reconstructActiveFormattingElements(); |
|
| 1640 | - |
|
| 1641 | - /* Append the token's character to the current node. */ |
|
| 1642 | - $this->insertText($token['data']); |
|
| 1643 | - break; |
|
| 1644 | - |
|
| 1645 | - /* A comment token */ |
|
| 1646 | - case HTML5::COMMENT: |
|
| 1647 | - /* Append a Comment node to the current node with the data |
|
| 1622 | + $this->afterHead(array( |
|
| 1623 | + 'name' => 'body', |
|
| 1624 | + 'type' => HTML5::STARTTAG, |
|
| 1625 | + 'attr' => array() |
|
| 1626 | + )); |
|
| 1627 | + |
|
| 1628 | + return $this->inBody($token); |
|
| 1629 | + } |
|
| 1630 | + } |
|
| 1631 | + |
|
| 1632 | + private function inBody($token) { |
|
| 1633 | + /* Handle the token as follows: */ |
|
| 1634 | + |
|
| 1635 | + switch($token['type']) { |
|
| 1636 | + /* A character token */ |
|
| 1637 | + case HTML5::CHARACTR: |
|
| 1638 | + /* Reconstruct the active formatting elements, if any. */ |
|
| 1639 | + $this->reconstructActiveFormattingElements(); |
|
| 1640 | + |
|
| 1641 | + /* Append the token's character to the current node. */ |
|
| 1642 | + $this->insertText($token['data']); |
|
| 1643 | + break; |
|
| 1644 | + |
|
| 1645 | + /* A comment token */ |
|
| 1646 | + case HTML5::COMMENT: |
|
| 1647 | + /* Append a Comment node to the current node with the data |
|
| 1648 | 1648 | attribute set to the data given in the comment token. */ |
| 1649 | - $this->insertComment($token['data']); |
|
| 1650 | - break; |
|
| 1649 | + $this->insertComment($token['data']); |
|
| 1650 | + break; |
|
| 1651 | 1651 | |
| 1652 | - case HTML5::STARTTAG: |
|
| 1653 | - switch($token['name']) { |
|
| 1654 | - /* A start tag token whose tag name is one of: "script", |
|
| 1652 | + case HTML5::STARTTAG: |
|
| 1653 | + switch($token['name']) { |
|
| 1654 | + /* A start tag token whose tag name is one of: "script", |
|
| 1655 | 1655 | "style" */ |
| 1656 | - case 'script': case 'style': |
|
| 1657 | - /* Process the token as if the insertion mode had been "in |
|
| 1656 | + case 'script': case 'style': |
|
| 1657 | + /* Process the token as if the insertion mode had been "in |
|
| 1658 | 1658 | head". */ |
| 1659 | - return $this->inHead($token); |
|
| 1660 | - break; |
|
| 1659 | + return $this->inHead($token); |
|
| 1660 | + break; |
|
| 1661 | 1661 | |
| 1662 | - /* A start tag token whose tag name is one of: "base", "link", |
|
| 1662 | + /* A start tag token whose tag name is one of: "base", "link", |
|
| 1663 | 1663 | "meta", "title" */ |
| 1664 | - case 'base': case 'link': case 'meta': case 'title': |
|
| 1665 | - /* Parse error. Process the token as if the insertion mode |
|
| 1664 | + case 'base': case 'link': case 'meta': case 'title': |
|
| 1665 | + /* Parse error. Process the token as if the insertion mode |
|
| 1666 | 1666 | had been "in head". */ |
| 1667 | - return $this->inHead($token); |
|
| 1668 | - break; |
|
| 1667 | + return $this->inHead($token); |
|
| 1668 | + break; |
|
| 1669 | 1669 | |
| 1670 | - /* A start tag token with the tag name "body" */ |
|
| 1671 | - case 'body': |
|
| 1672 | - /* Parse error. If the second element on the stack of open |
|
| 1670 | + /* A start tag token with the tag name "body" */ |
|
| 1671 | + case 'body': |
|
| 1672 | + /* Parse error. If the second element on the stack of open |
|
| 1673 | 1673 | elements is not a body element, or, if the stack of open |
| 1674 | 1674 | elements has only one node on it, then ignore the token. |
| 1675 | 1675 | (innerHTML case) */ |
| 1676 | - if(count($this->stack) === 1 || $this->stack[1]->nodeName !== 'body') { |
|
| 1677 | - // Ignore |
|
| 1676 | + if(count($this->stack) === 1 || $this->stack[1]->nodeName !== 'body') { |
|
| 1677 | + // Ignore |
|
| 1678 | 1678 | |
| 1679 | - /* Otherwise, for each attribute on the token, check to see |
|
| 1679 | + /* Otherwise, for each attribute on the token, check to see |
|
| 1680 | 1680 | if the attribute is already present on the body element (the |
| 1681 | 1681 | second element) on the stack of open elements. If it is not, |
| 1682 | 1682 | add the attribute and its corresponding value to that |
| 1683 | 1683 | element. */ |
| 1684 | - } else { |
|
| 1685 | - foreach($token['attr'] as $attr) { |
|
| 1686 | - if(!$this->stack[1]->hasAttribute($attr['name'])) { |
|
| 1687 | - $this->stack[1]->setAttribute($attr['name'], $attr['value']); |
|
| 1688 | - } |
|
| 1689 | - } |
|
| 1690 | - } |
|
| 1691 | - break; |
|
| 1692 | - |
|
| 1693 | - /* A start tag whose tag name is one of: "address", |
|
| 1684 | + } else { |
|
| 1685 | + foreach($token['attr'] as $attr) { |
|
| 1686 | + if(!$this->stack[1]->hasAttribute($attr['name'])) { |
|
| 1687 | + $this->stack[1]->setAttribute($attr['name'], $attr['value']); |
|
| 1688 | + } |
|
| 1689 | + } |
|
| 1690 | + } |
|
| 1691 | + break; |
|
| 1692 | + |
|
| 1693 | + /* A start tag whose tag name is one of: "address", |
|
| 1694 | 1694 | "blockquote", "center", "dir", "div", "dl", "fieldset", |
| 1695 | 1695 | "listing", "menu", "ol", "p", "ul" */ |
| 1696 | - case 'address': case 'blockquote': case 'center': case 'dir': |
|
| 1697 | - case 'div': case 'dl': case 'fieldset': case 'listing': |
|
| 1698 | - case 'menu': case 'ol': case 'p': case 'ul': |
|
| 1699 | - /* If the stack of open elements has a p element in scope, |
|
| 1696 | + case 'address': case 'blockquote': case 'center': case 'dir': |
|
| 1697 | + case 'div': case 'dl': case 'fieldset': case 'listing': |
|
| 1698 | + case 'menu': case 'ol': case 'p': case 'ul': |
|
| 1699 | + /* If the stack of open elements has a p element in scope, |
|
| 1700 | 1700 | then act as if an end tag with the tag name p had been |
| 1701 | 1701 | seen. */ |
| 1702 | - if($this->elementInScope('p')) { |
|
| 1703 | - $this->emitToken(array( |
|
| 1704 | - 'name' => 'p', |
|
| 1705 | - 'type' => HTML5::ENDTAG |
|
| 1706 | - )); |
|
| 1707 | - } |
|
| 1708 | - |
|
| 1709 | - /* Insert an HTML element for the token. */ |
|
| 1710 | - $this->insertElement($token); |
|
| 1711 | - break; |
|
| 1712 | - |
|
| 1713 | - /* A start tag whose tag name is "form" */ |
|
| 1714 | - case 'form': |
|
| 1715 | - /* If the form element pointer is not null, ignore the |
|
| 1702 | + if($this->elementInScope('p')) { |
|
| 1703 | + $this->emitToken(array( |
|
| 1704 | + 'name' => 'p', |
|
| 1705 | + 'type' => HTML5::ENDTAG |
|
| 1706 | + )); |
|
| 1707 | + } |
|
| 1708 | + |
|
| 1709 | + /* Insert an HTML element for the token. */ |
|
| 1710 | + $this->insertElement($token); |
|
| 1711 | + break; |
|
| 1712 | + |
|
| 1713 | + /* A start tag whose tag name is "form" */ |
|
| 1714 | + case 'form': |
|
| 1715 | + /* If the form element pointer is not null, ignore the |
|
| 1716 | 1716 | token with a parse error. */ |
| 1717 | - if($this->form_pointer !== null) { |
|
| 1718 | - // Ignore. |
|
| 1717 | + if($this->form_pointer !== null) { |
|
| 1718 | + // Ignore. |
|
| 1719 | 1719 | |
| 1720 | - /* Otherwise: */ |
|
| 1721 | - } else { |
|
| 1722 | - /* If the stack of open elements has a p element in |
|
| 1720 | + /* Otherwise: */ |
|
| 1721 | + } else { |
|
| 1722 | + /* If the stack of open elements has a p element in |
|
| 1723 | 1723 | scope, then act as if an end tag with the tag name p |
| 1724 | 1724 | had been seen. */ |
| 1725 | - if($this->elementInScope('p')) { |
|
| 1726 | - $this->emitToken(array( |
|
| 1727 | - 'name' => 'p', |
|
| 1728 | - 'type' => HTML5::ENDTAG |
|
| 1729 | - )); |
|
| 1730 | - } |
|
| 1731 | - |
|
| 1732 | - /* Insert an HTML element for the token, and set the |
|
| 1725 | + if($this->elementInScope('p')) { |
|
| 1726 | + $this->emitToken(array( |
|
| 1727 | + 'name' => 'p', |
|
| 1728 | + 'type' => HTML5::ENDTAG |
|
| 1729 | + )); |
|
| 1730 | + } |
|
| 1731 | + |
|
| 1732 | + /* Insert an HTML element for the token, and set the |
|
| 1733 | 1733 | form element pointer to point to the element created. */ |
| 1734 | - $element = $this->insertElement($token); |
|
| 1735 | - $this->form_pointer = $element; |
|
| 1736 | - } |
|
| 1737 | - break; |
|
| 1738 | - |
|
| 1739 | - /* A start tag whose tag name is "li", "dd" or "dt" */ |
|
| 1740 | - case 'li': case 'dd': case 'dt': |
|
| 1741 | - /* If the stack of open elements has a p element in scope, |
|
| 1734 | + $element = $this->insertElement($token); |
|
| 1735 | + $this->form_pointer = $element; |
|
| 1736 | + } |
|
| 1737 | + break; |
|
| 1738 | + |
|
| 1739 | + /* A start tag whose tag name is "li", "dd" or "dt" */ |
|
| 1740 | + case 'li': case 'dd': case 'dt': |
|
| 1741 | + /* If the stack of open elements has a p element in scope, |
|
| 1742 | 1742 | then act as if an end tag with the tag name p had been |
| 1743 | 1743 | seen. */ |
| 1744 | - if($this->elementInScope('p')) { |
|
| 1745 | - $this->emitToken(array( |
|
| 1746 | - 'name' => 'p', |
|
| 1747 | - 'type' => HTML5::ENDTAG |
|
| 1748 | - )); |
|
| 1749 | - } |
|
| 1744 | + if($this->elementInScope('p')) { |
|
| 1745 | + $this->emitToken(array( |
|
| 1746 | + 'name' => 'p', |
|
| 1747 | + 'type' => HTML5::ENDTAG |
|
| 1748 | + )); |
|
| 1749 | + } |
|
| 1750 | 1750 | |
| 1751 | - $stack_length = count($this->stack) - 1; |
|
| 1751 | + $stack_length = count($this->stack) - 1; |
|
| 1752 | 1752 | |
| 1753 | - for($n = $stack_length; 0 <= $n; $n--) { |
|
| 1754 | - /* 1. Initialise node to be the current node (the |
|
| 1753 | + for($n = $stack_length; 0 <= $n; $n--) { |
|
| 1754 | + /* 1. Initialise node to be the current node (the |
|
| 1755 | 1755 | bottommost node of the stack). */ |
| 1756 | - $stop = false; |
|
| 1757 | - $node = $this->stack[$n]; |
|
| 1758 | - $cat = $this->getElementCategory($node->tagName); |
|
| 1756 | + $stop = false; |
|
| 1757 | + $node = $this->stack[$n]; |
|
| 1758 | + $cat = $this->getElementCategory($node->tagName); |
|
| 1759 | 1759 | |
| 1760 | - /* 2. If node is an li, dd or dt element, then pop all |
|
| 1760 | + /* 2. If node is an li, dd or dt element, then pop all |
|
| 1761 | 1761 | the nodes from the current node up to node, including |
| 1762 | 1762 | node, then stop this algorithm. */ |
| 1763 | - if($token['name'] === $node->tagName || ($token['name'] !== 'li' |
|
| 1764 | - && ($node->tagName === 'dd' || $node->tagName === 'dt'))) { |
|
| 1765 | - for($x = $stack_length; $x >= $n ; $x--) { |
|
| 1766 | - array_pop($this->stack); |
|
| 1767 | - } |
|
| 1763 | + if($token['name'] === $node->tagName || ($token['name'] !== 'li' |
|
| 1764 | + && ($node->tagName === 'dd' || $node->tagName === 'dt'))) { |
|
| 1765 | + for($x = $stack_length; $x >= $n ; $x--) { |
|
| 1766 | + array_pop($this->stack); |
|
| 1767 | + } |
|
| 1768 | 1768 | |
| 1769 | - break; |
|
| 1770 | - } |
|
| 1769 | + break; |
|
| 1770 | + } |
|
| 1771 | 1771 | |
| 1772 | - /* 3. If node is not in the formatting category, and is |
|
| 1772 | + /* 3. If node is not in the formatting category, and is |
|
| 1773 | 1773 | not in the phrasing category, and is not an address or |
| 1774 | 1774 | div element, then stop this algorithm. */ |
| 1775 | - if($cat !== self::FORMATTING && $cat !== self::PHRASING && |
|
| 1776 | - $node->tagName !== 'address' && $node->tagName !== 'div') { |
|
| 1777 | - break; |
|
| 1778 | - } |
|
| 1779 | - } |
|
| 1775 | + if($cat !== self::FORMATTING && $cat !== self::PHRASING && |
|
| 1776 | + $node->tagName !== 'address' && $node->tagName !== 'div') { |
|
| 1777 | + break; |
|
| 1778 | + } |
|
| 1779 | + } |
|
| 1780 | 1780 | |
| 1781 | - /* Finally, insert an HTML element with the same tag |
|
| 1781 | + /* Finally, insert an HTML element with the same tag |
|
| 1782 | 1782 | name as the token's. */ |
| 1783 | - $this->insertElement($token); |
|
| 1784 | - break; |
|
| 1783 | + $this->insertElement($token); |
|
| 1784 | + break; |
|
| 1785 | 1785 | |
| 1786 | - /* A start tag token whose tag name is "plaintext" */ |
|
| 1787 | - case 'plaintext': |
|
| 1788 | - /* If the stack of open elements has a p element in scope, |
|
| 1786 | + /* A start tag token whose tag name is "plaintext" */ |
|
| 1787 | + case 'plaintext': |
|
| 1788 | + /* If the stack of open elements has a p element in scope, |
|
| 1789 | 1789 | then act as if an end tag with the tag name p had been |
| 1790 | 1790 | seen. */ |
| 1791 | - if($this->elementInScope('p')) { |
|
| 1792 | - $this->emitToken(array( |
|
| 1793 | - 'name' => 'p', |
|
| 1794 | - 'type' => HTML5::ENDTAG |
|
| 1795 | - )); |
|
| 1796 | - } |
|
| 1791 | + if($this->elementInScope('p')) { |
|
| 1792 | + $this->emitToken(array( |
|
| 1793 | + 'name' => 'p', |
|
| 1794 | + 'type' => HTML5::ENDTAG |
|
| 1795 | + )); |
|
| 1796 | + } |
|
| 1797 | 1797 | |
| 1798 | - /* Insert an HTML element for the token. */ |
|
| 1799 | - $this->insertElement($token); |
|
| 1798 | + /* Insert an HTML element for the token. */ |
|
| 1799 | + $this->insertElement($token); |
|
| 1800 | 1800 | |
| 1801 | - return HTML5::PLAINTEXT; |
|
| 1802 | - break; |
|
| 1801 | + return HTML5::PLAINTEXT; |
|
| 1802 | + break; |
|
| 1803 | 1803 | |
| 1804 | - /* A start tag whose tag name is one of: "h1", "h2", "h3", "h4", |
|
| 1804 | + /* A start tag whose tag name is one of: "h1", "h2", "h3", "h4", |
|
| 1805 | 1805 | "h5", "h6" */ |
| 1806 | - case 'h1': case 'h2': case 'h3': case 'h4': case 'h5': case 'h6': |
|
| 1807 | - /* If the stack of open elements has a p element in scope, |
|
| 1806 | + case 'h1': case 'h2': case 'h3': case 'h4': case 'h5': case 'h6': |
|
| 1807 | + /* If the stack of open elements has a p element in scope, |
|
| 1808 | 1808 | then act as if an end tag with the tag name p had been seen. */ |
| 1809 | - if($this->elementInScope('p')) { |
|
| 1810 | - $this->emitToken(array( |
|
| 1811 | - 'name' => 'p', |
|
| 1812 | - 'type' => HTML5::ENDTAG |
|
| 1813 | - )); |
|
| 1814 | - } |
|
| 1815 | - |
|
| 1816 | - /* If the stack of open elements has in scope an element whose |
|
| 1809 | + if($this->elementInScope('p')) { |
|
| 1810 | + $this->emitToken(array( |
|
| 1811 | + 'name' => 'p', |
|
| 1812 | + 'type' => HTML5::ENDTAG |
|
| 1813 | + )); |
|
| 1814 | + } |
|
| 1815 | + |
|
| 1816 | + /* If the stack of open elements has in scope an element whose |
|
| 1817 | 1817 | tag name is one of "h1", "h2", "h3", "h4", "h5", or "h6", then |
| 1818 | 1818 | this is a parse error; pop elements from the stack until an |
| 1819 | 1819 | element with one of those tag names has been popped from the |
| 1820 | 1820 | stack. */ |
| 1821 | - while($this->elementInScope(array('h1', 'h2', 'h3', 'h4', 'h5', 'h6'))) { |
|
| 1822 | - array_pop($this->stack); |
|
| 1823 | - } |
|
| 1821 | + while($this->elementInScope(array('h1', 'h2', 'h3', 'h4', 'h5', 'h6'))) { |
|
| 1822 | + array_pop($this->stack); |
|
| 1823 | + } |
|
| 1824 | 1824 | |
| 1825 | - /* Insert an HTML element for the token. */ |
|
| 1826 | - $this->insertElement($token); |
|
| 1827 | - break; |
|
| 1825 | + /* Insert an HTML element for the token. */ |
|
| 1826 | + $this->insertElement($token); |
|
| 1827 | + break; |
|
| 1828 | 1828 | |
| 1829 | - /* A start tag whose tag name is "a" */ |
|
| 1830 | - case 'a': |
|
| 1831 | - /* If the list of active formatting elements contains |
|
| 1829 | + /* A start tag whose tag name is "a" */ |
|
| 1830 | + case 'a': |
|
| 1831 | + /* If the list of active formatting elements contains |
|
| 1832 | 1832 | an element whose tag name is "a" between the end of the |
| 1833 | 1833 | list and the last marker on the list (or the start of |
| 1834 | 1834 | the list if there is no marker on the list), then this |
@@ -1837,940 +1837,940 @@ discard block |
||
| 1837 | 1837 | of active formatting elements and the stack of open |
| 1838 | 1838 | elements if the end tag didn't already remove it (it |
| 1839 | 1839 | might not have if the element is not in table scope). */ |
| 1840 | - $leng = count($this->a_formatting); |
|
| 1840 | + $leng = count($this->a_formatting); |
|
| 1841 | 1841 | |
| 1842 | - for($n = $leng - 1; $n >= 0; $n--) { |
|
| 1843 | - if($this->a_formatting[$n] === self::MARKER) { |
|
| 1844 | - break; |
|
| 1842 | + for($n = $leng - 1; $n >= 0; $n--) { |
|
| 1843 | + if($this->a_formatting[$n] === self::MARKER) { |
|
| 1844 | + break; |
|
| 1845 | 1845 | |
| 1846 | - } elseif($this->a_formatting[$n]->nodeName === 'a') { |
|
| 1847 | - $this->emitToken(array( |
|
| 1848 | - 'name' => 'a', |
|
| 1849 | - 'type' => HTML5::ENDTAG |
|
| 1850 | - )); |
|
| 1851 | - break; |
|
| 1852 | - } |
|
| 1853 | - } |
|
| 1846 | + } elseif($this->a_formatting[$n]->nodeName === 'a') { |
|
| 1847 | + $this->emitToken(array( |
|
| 1848 | + 'name' => 'a', |
|
| 1849 | + 'type' => HTML5::ENDTAG |
|
| 1850 | + )); |
|
| 1851 | + break; |
|
| 1852 | + } |
|
| 1853 | + } |
|
| 1854 | 1854 | |
| 1855 | - /* Reconstruct the active formatting elements, if any. */ |
|
| 1856 | - $this->reconstructActiveFormattingElements(); |
|
| 1855 | + /* Reconstruct the active formatting elements, if any. */ |
|
| 1856 | + $this->reconstructActiveFormattingElements(); |
|
| 1857 | 1857 | |
| 1858 | - /* Insert an HTML element for the token. */ |
|
| 1859 | - $el = $this->insertElement($token); |
|
| 1858 | + /* Insert an HTML element for the token. */ |
|
| 1859 | + $el = $this->insertElement($token); |
|
| 1860 | 1860 | |
| 1861 | - /* Add that element to the list of active formatting |
|
| 1861 | + /* Add that element to the list of active formatting |
|
| 1862 | 1862 | elements. */ |
| 1863 | - $this->a_formatting[] = $el; |
|
| 1864 | - break; |
|
| 1863 | + $this->a_formatting[] = $el; |
|
| 1864 | + break; |
|
| 1865 | 1865 | |
| 1866 | - /* A start tag whose tag name is one of: "b", "big", "em", "font", |
|
| 1866 | + /* A start tag whose tag name is one of: "b", "big", "em", "font", |
|
| 1867 | 1867 | "i", "nobr", "s", "small", "strike", "strong", "tt", "u" */ |
| 1868 | - case 'b': case 'big': case 'em': case 'font': case 'i': |
|
| 1869 | - case 'nobr': case 's': case 'small': case 'strike': |
|
| 1870 | - case 'strong': case 'tt': case 'u': |
|
| 1871 | - /* Reconstruct the active formatting elements, if any. */ |
|
| 1872 | - $this->reconstructActiveFormattingElements(); |
|
| 1868 | + case 'b': case 'big': case 'em': case 'font': case 'i': |
|
| 1869 | + case 'nobr': case 's': case 'small': case 'strike': |
|
| 1870 | + case 'strong': case 'tt': case 'u': |
|
| 1871 | + /* Reconstruct the active formatting elements, if any. */ |
|
| 1872 | + $this->reconstructActiveFormattingElements(); |
|
| 1873 | 1873 | |
| 1874 | - /* Insert an HTML element for the token. */ |
|
| 1875 | - $el = $this->insertElement($token); |
|
| 1874 | + /* Insert an HTML element for the token. */ |
|
| 1875 | + $el = $this->insertElement($token); |
|
| 1876 | 1876 | |
| 1877 | - /* Add that element to the list of active formatting |
|
| 1877 | + /* Add that element to the list of active formatting |
|
| 1878 | 1878 | elements. */ |
| 1879 | - $this->a_formatting[] = $el; |
|
| 1880 | - break; |
|
| 1879 | + $this->a_formatting[] = $el; |
|
| 1880 | + break; |
|
| 1881 | 1881 | |
| 1882 | - /* A start tag token whose tag name is "button" */ |
|
| 1883 | - case 'button': |
|
| 1884 | - /* If the stack of open elements has a button element in scope, |
|
| 1882 | + /* A start tag token whose tag name is "button" */ |
|
| 1883 | + case 'button': |
|
| 1884 | + /* If the stack of open elements has a button element in scope, |
|
| 1885 | 1885 | then this is a parse error; act as if an end tag with the tag |
| 1886 | 1886 | name "button" had been seen, then reprocess the token. (We don't |
| 1887 | 1887 | do that. Unnecessary.) */ |
| 1888 | - if($this->elementInScope('button')) { |
|
| 1889 | - $this->inBody(array( |
|
| 1890 | - 'name' => 'button', |
|
| 1891 | - 'type' => HTML5::ENDTAG |
|
| 1892 | - )); |
|
| 1893 | - } |
|
| 1888 | + if($this->elementInScope('button')) { |
|
| 1889 | + $this->inBody(array( |
|
| 1890 | + 'name' => 'button', |
|
| 1891 | + 'type' => HTML5::ENDTAG |
|
| 1892 | + )); |
|
| 1893 | + } |
|
| 1894 | 1894 | |
| 1895 | - /* Reconstruct the active formatting elements, if any. */ |
|
| 1896 | - $this->reconstructActiveFormattingElements(); |
|
| 1895 | + /* Reconstruct the active formatting elements, if any. */ |
|
| 1896 | + $this->reconstructActiveFormattingElements(); |
|
| 1897 | 1897 | |
| 1898 | - /* Insert an HTML element for the token. */ |
|
| 1899 | - $this->insertElement($token); |
|
| 1898 | + /* Insert an HTML element for the token. */ |
|
| 1899 | + $this->insertElement($token); |
|
| 1900 | 1900 | |
| 1901 | - /* Insert a marker at the end of the list of active |
|
| 1901 | + /* Insert a marker at the end of the list of active |
|
| 1902 | 1902 | formatting elements. */ |
| 1903 | - $this->a_formatting[] = self::MARKER; |
|
| 1904 | - break; |
|
| 1903 | + $this->a_formatting[] = self::MARKER; |
|
| 1904 | + break; |
|
| 1905 | 1905 | |
| 1906 | - /* A start tag token whose tag name is one of: "marquee", "object" */ |
|
| 1907 | - case 'marquee': case 'object': |
|
| 1908 | - /* Reconstruct the active formatting elements, if any. */ |
|
| 1909 | - $this->reconstructActiveFormattingElements(); |
|
| 1906 | + /* A start tag token whose tag name is one of: "marquee", "object" */ |
|
| 1907 | + case 'marquee': case 'object': |
|
| 1908 | + /* Reconstruct the active formatting elements, if any. */ |
|
| 1909 | + $this->reconstructActiveFormattingElements(); |
|
| 1910 | 1910 | |
| 1911 | - /* Insert an HTML element for the token. */ |
|
| 1912 | - $this->insertElement($token); |
|
| 1911 | + /* Insert an HTML element for the token. */ |
|
| 1912 | + $this->insertElement($token); |
|
| 1913 | 1913 | |
| 1914 | - /* Insert a marker at the end of the list of active |
|
| 1914 | + /* Insert a marker at the end of the list of active |
|
| 1915 | 1915 | formatting elements. */ |
| 1916 | - $this->a_formatting[] = self::MARKER; |
|
| 1917 | - break; |
|
| 1916 | + $this->a_formatting[] = self::MARKER; |
|
| 1917 | + break; |
|
| 1918 | 1918 | |
| 1919 | - /* A start tag token whose tag name is "xmp" */ |
|
| 1920 | - case 'xmp': |
|
| 1921 | - /* Reconstruct the active formatting elements, if any. */ |
|
| 1922 | - $this->reconstructActiveFormattingElements(); |
|
| 1919 | + /* A start tag token whose tag name is "xmp" */ |
|
| 1920 | + case 'xmp': |
|
| 1921 | + /* Reconstruct the active formatting elements, if any. */ |
|
| 1922 | + $this->reconstructActiveFormattingElements(); |
|
| 1923 | 1923 | |
| 1924 | - /* Insert an HTML element for the token. */ |
|
| 1925 | - $this->insertElement($token); |
|
| 1924 | + /* Insert an HTML element for the token. */ |
|
| 1925 | + $this->insertElement($token); |
|
| 1926 | 1926 | |
| 1927 | - /* Switch the content model flag to the CDATA state. */ |
|
| 1928 | - return HTML5::CDATA; |
|
| 1929 | - break; |
|
| 1927 | + /* Switch the content model flag to the CDATA state. */ |
|
| 1928 | + return HTML5::CDATA; |
|
| 1929 | + break; |
|
| 1930 | 1930 | |
| 1931 | - /* A start tag whose tag name is "table" */ |
|
| 1932 | - case 'table': |
|
| 1933 | - /* If the stack of open elements has a p element in scope, |
|
| 1931 | + /* A start tag whose tag name is "table" */ |
|
| 1932 | + case 'table': |
|
| 1933 | + /* If the stack of open elements has a p element in scope, |
|
| 1934 | 1934 | then act as if an end tag with the tag name p had been seen. */ |
| 1935 | - if($this->elementInScope('p')) { |
|
| 1936 | - $this->emitToken(array( |
|
| 1937 | - 'name' => 'p', |
|
| 1938 | - 'type' => HTML5::ENDTAG |
|
| 1939 | - )); |
|
| 1940 | - } |
|
| 1935 | + if($this->elementInScope('p')) { |
|
| 1936 | + $this->emitToken(array( |
|
| 1937 | + 'name' => 'p', |
|
| 1938 | + 'type' => HTML5::ENDTAG |
|
| 1939 | + )); |
|
| 1940 | + } |
|
| 1941 | 1941 | |
| 1942 | - /* Insert an HTML element for the token. */ |
|
| 1943 | - $this->insertElement($token); |
|
| 1942 | + /* Insert an HTML element for the token. */ |
|
| 1943 | + $this->insertElement($token); |
|
| 1944 | 1944 | |
| 1945 | - /* Change the insertion mode to "in table". */ |
|
| 1946 | - $this->mode = self::IN_TABLE; |
|
| 1947 | - break; |
|
| 1945 | + /* Change the insertion mode to "in table". */ |
|
| 1946 | + $this->mode = self::IN_TABLE; |
|
| 1947 | + break; |
|
| 1948 | 1948 | |
| 1949 | - /* A start tag whose tag name is one of: "area", "basefont", |
|
| 1949 | + /* A start tag whose tag name is one of: "area", "basefont", |
|
| 1950 | 1950 | "bgsound", "br", "embed", "img", "param", "spacer", "wbr" */ |
| 1951 | - case 'area': case 'basefont': case 'bgsound': case 'br': |
|
| 1952 | - case 'embed': case 'img': case 'param': case 'spacer': |
|
| 1953 | - case 'wbr': |
|
| 1954 | - /* Reconstruct the active formatting elements, if any. */ |
|
| 1955 | - $this->reconstructActiveFormattingElements(); |
|
| 1956 | - |
|
| 1957 | - /* Insert an HTML element for the token. */ |
|
| 1958 | - $this->insertElement($token); |
|
| 1959 | - |
|
| 1960 | - /* Immediately pop the current node off the stack of open elements. */ |
|
| 1961 | - array_pop($this->stack); |
|
| 1962 | - break; |
|
| 1963 | - |
|
| 1964 | - /* A start tag whose tag name is "hr" */ |
|
| 1965 | - case 'hr': |
|
| 1966 | - /* If the stack of open elements has a p element in scope, |
|
| 1951 | + case 'area': case 'basefont': case 'bgsound': case 'br': |
|
| 1952 | + case 'embed': case 'img': case 'param': case 'spacer': |
|
| 1953 | + case 'wbr': |
|
| 1954 | + /* Reconstruct the active formatting elements, if any. */ |
|
| 1955 | + $this->reconstructActiveFormattingElements(); |
|
| 1956 | + |
|
| 1957 | + /* Insert an HTML element for the token. */ |
|
| 1958 | + $this->insertElement($token); |
|
| 1959 | + |
|
| 1960 | + /* Immediately pop the current node off the stack of open elements. */ |
|
| 1961 | + array_pop($this->stack); |
|
| 1962 | + break; |
|
| 1963 | + |
|
| 1964 | + /* A start tag whose tag name is "hr" */ |
|
| 1965 | + case 'hr': |
|
| 1966 | + /* If the stack of open elements has a p element in scope, |
|
| 1967 | 1967 | then act as if an end tag with the tag name p had been seen. */ |
| 1968 | - if($this->elementInScope('p')) { |
|
| 1969 | - $this->emitToken(array( |
|
| 1970 | - 'name' => 'p', |
|
| 1971 | - 'type' => HTML5::ENDTAG |
|
| 1972 | - )); |
|
| 1973 | - } |
|
| 1974 | - |
|
| 1975 | - /* Insert an HTML element for the token. */ |
|
| 1976 | - $this->insertElement($token); |
|
| 1977 | - |
|
| 1978 | - /* Immediately pop the current node off the stack of open elements. */ |
|
| 1979 | - array_pop($this->stack); |
|
| 1980 | - break; |
|
| 1981 | - |
|
| 1982 | - /* A start tag whose tag name is "image" */ |
|
| 1983 | - case 'image': |
|
| 1984 | - /* Parse error. Change the token's tag name to "img" and |
|
| 1968 | + if($this->elementInScope('p')) { |
|
| 1969 | + $this->emitToken(array( |
|
| 1970 | + 'name' => 'p', |
|
| 1971 | + 'type' => HTML5::ENDTAG |
|
| 1972 | + )); |
|
| 1973 | + } |
|
| 1974 | + |
|
| 1975 | + /* Insert an HTML element for the token. */ |
|
| 1976 | + $this->insertElement($token); |
|
| 1977 | + |
|
| 1978 | + /* Immediately pop the current node off the stack of open elements. */ |
|
| 1979 | + array_pop($this->stack); |
|
| 1980 | + break; |
|
| 1981 | + |
|
| 1982 | + /* A start tag whose tag name is "image" */ |
|
| 1983 | + case 'image': |
|
| 1984 | + /* Parse error. Change the token's tag name to "img" and |
|
| 1985 | 1985 | reprocess it. (Don't ask.) */ |
| 1986 | - $token['name'] = 'img'; |
|
| 1987 | - return $this->inBody($token); |
|
| 1988 | - break; |
|
| 1986 | + $token['name'] = 'img'; |
|
| 1987 | + return $this->inBody($token); |
|
| 1988 | + break; |
|
| 1989 | 1989 | |
| 1990 | - /* A start tag whose tag name is "input" */ |
|
| 1991 | - case 'input': |
|
| 1992 | - /* Reconstruct the active formatting elements, if any. */ |
|
| 1993 | - $this->reconstructActiveFormattingElements(); |
|
| 1990 | + /* A start tag whose tag name is "input" */ |
|
| 1991 | + case 'input': |
|
| 1992 | + /* Reconstruct the active formatting elements, if any. */ |
|
| 1993 | + $this->reconstructActiveFormattingElements(); |
|
| 1994 | 1994 | |
| 1995 | - /* Insert an input element for the token. */ |
|
| 1996 | - $element = $this->insertElement($token, false); |
|
| 1995 | + /* Insert an input element for the token. */ |
|
| 1996 | + $element = $this->insertElement($token, false); |
|
| 1997 | 1997 | |
| 1998 | - /* If the form element pointer is not null, then associate the |
|
| 1998 | + /* If the form element pointer is not null, then associate the |
|
| 1999 | 1999 | input element with the form element pointed to by the form |
| 2000 | 2000 | element pointer. */ |
| 2001 | - $this->form_pointer !== null |
|
| 2002 | - ? $this->form_pointer->appendChild($element) |
|
| 2003 | - : end($this->stack)->appendChild($element); |
|
| 2001 | + $this->form_pointer !== null |
|
| 2002 | + ? $this->form_pointer->appendChild($element) |
|
| 2003 | + : end($this->stack)->appendChild($element); |
|
| 2004 | 2004 | |
| 2005 | - /* Pop that input element off the stack of open elements. */ |
|
| 2006 | - array_pop($this->stack); |
|
| 2007 | - break; |
|
| 2005 | + /* Pop that input element off the stack of open elements. */ |
|
| 2006 | + array_pop($this->stack); |
|
| 2007 | + break; |
|
| 2008 | 2008 | |
| 2009 | - /* A start tag whose tag name is "isindex" */ |
|
| 2010 | - case 'isindex': |
|
| 2011 | - /* Parse error. */ |
|
| 2012 | - // w/e |
|
| 2009 | + /* A start tag whose tag name is "isindex" */ |
|
| 2010 | + case 'isindex': |
|
| 2011 | + /* Parse error. */ |
|
| 2012 | + // w/e |
|
| 2013 | 2013 | |
| 2014 | - /* If the form element pointer is not null, |
|
| 2014 | + /* If the form element pointer is not null, |
|
| 2015 | 2015 | then ignore the token. */ |
| 2016 | - if($this->form_pointer === null) { |
|
| 2017 | - /* Act as if a start tag token with the tag name "form" had |
|
| 2016 | + if($this->form_pointer === null) { |
|
| 2017 | + /* Act as if a start tag token with the tag name "form" had |
|
| 2018 | 2018 | been seen. */ |
| 2019 | - $this->inBody(array( |
|
| 2020 | - 'name' => 'body', |
|
| 2021 | - 'type' => HTML5::STARTTAG, |
|
| 2022 | - 'attr' => array() |
|
| 2023 | - )); |
|
| 2019 | + $this->inBody(array( |
|
| 2020 | + 'name' => 'body', |
|
| 2021 | + 'type' => HTML5::STARTTAG, |
|
| 2022 | + 'attr' => array() |
|
| 2023 | + )); |
|
| 2024 | 2024 | |
| 2025 | - /* Act as if a start tag token with the tag name "hr" had |
|
| 2025 | + /* Act as if a start tag token with the tag name "hr" had |
|
| 2026 | 2026 | been seen. */ |
| 2027 | - $this->inBody(array( |
|
| 2028 | - 'name' => 'hr', |
|
| 2029 | - 'type' => HTML5::STARTTAG, |
|
| 2030 | - 'attr' => array() |
|
| 2031 | - )); |
|
| 2027 | + $this->inBody(array( |
|
| 2028 | + 'name' => 'hr', |
|
| 2029 | + 'type' => HTML5::STARTTAG, |
|
| 2030 | + 'attr' => array() |
|
| 2031 | + )); |
|
| 2032 | 2032 | |
| 2033 | - /* Act as if a start tag token with the tag name "p" had |
|
| 2033 | + /* Act as if a start tag token with the tag name "p" had |
|
| 2034 | 2034 | been seen. */ |
| 2035 | - $this->inBody(array( |
|
| 2036 | - 'name' => 'p', |
|
| 2037 | - 'type' => HTML5::STARTTAG, |
|
| 2038 | - 'attr' => array() |
|
| 2039 | - )); |
|
| 2035 | + $this->inBody(array( |
|
| 2036 | + 'name' => 'p', |
|
| 2037 | + 'type' => HTML5::STARTTAG, |
|
| 2038 | + 'attr' => array() |
|
| 2039 | + )); |
|
| 2040 | 2040 | |
| 2041 | - /* Act as if a start tag token with the tag name "label" |
|
| 2041 | + /* Act as if a start tag token with the tag name "label" |
|
| 2042 | 2042 | had been seen. */ |
| 2043 | - $this->inBody(array( |
|
| 2044 | - 'name' => 'label', |
|
| 2045 | - 'type' => HTML5::STARTTAG, |
|
| 2046 | - 'attr' => array() |
|
| 2047 | - )); |
|
| 2043 | + $this->inBody(array( |
|
| 2044 | + 'name' => 'label', |
|
| 2045 | + 'type' => HTML5::STARTTAG, |
|
| 2046 | + 'attr' => array() |
|
| 2047 | + )); |
|
| 2048 | 2048 | |
| 2049 | - /* Act as if a stream of character tokens had been seen. */ |
|
| 2050 | - $this->insertText('This is a searchable index. '. |
|
| 2051 | - 'Insert your search keywords here: '); |
|
| 2049 | + /* Act as if a stream of character tokens had been seen. */ |
|
| 2050 | + $this->insertText('This is a searchable index. '. |
|
| 2051 | + 'Insert your search keywords here: '); |
|
| 2052 | 2052 | |
| 2053 | - /* Act as if a start tag token with the tag name "input" |
|
| 2053 | + /* Act as if a start tag token with the tag name "input" |
|
| 2054 | 2054 | had been seen, with all the attributes from the "isindex" |
| 2055 | 2055 | token, except with the "name" attribute set to the value |
| 2056 | 2056 | "isindex" (ignoring any explicit "name" attribute). */ |
| 2057 | - $attr = $token['attr']; |
|
| 2058 | - $attr[] = array('name' => 'name', 'value' => 'isindex'); |
|
| 2057 | + $attr = $token['attr']; |
|
| 2058 | + $attr[] = array('name' => 'name', 'value' => 'isindex'); |
|
| 2059 | 2059 | |
| 2060 | - $this->inBody(array( |
|
| 2061 | - 'name' => 'input', |
|
| 2062 | - 'type' => HTML5::STARTTAG, |
|
| 2063 | - 'attr' => $attr |
|
| 2064 | - )); |
|
| 2060 | + $this->inBody(array( |
|
| 2061 | + 'name' => 'input', |
|
| 2062 | + 'type' => HTML5::STARTTAG, |
|
| 2063 | + 'attr' => $attr |
|
| 2064 | + )); |
|
| 2065 | 2065 | |
| 2066 | - /* Act as if a stream of character tokens had been seen |
|
| 2066 | + /* Act as if a stream of character tokens had been seen |
|
| 2067 | 2067 | (see below for what they should say). */ |
| 2068 | - $this->insertText('This is a searchable index. '. |
|
| 2069 | - 'Insert your search keywords here: '); |
|
| 2068 | + $this->insertText('This is a searchable index. '. |
|
| 2069 | + 'Insert your search keywords here: '); |
|
| 2070 | 2070 | |
| 2071 | - /* Act as if an end tag token with the tag name "label" |
|
| 2071 | + /* Act as if an end tag token with the tag name "label" |
|
| 2072 | 2072 | had been seen. */ |
| 2073 | - $this->inBody(array( |
|
| 2074 | - 'name' => 'label', |
|
| 2075 | - 'type' => HTML5::ENDTAG |
|
| 2076 | - )); |
|
| 2073 | + $this->inBody(array( |
|
| 2074 | + 'name' => 'label', |
|
| 2075 | + 'type' => HTML5::ENDTAG |
|
| 2076 | + )); |
|
| 2077 | 2077 | |
| 2078 | - /* Act as if an end tag token with the tag name "p" had |
|
| 2078 | + /* Act as if an end tag token with the tag name "p" had |
|
| 2079 | 2079 | been seen. */ |
| 2080 | - $this->inBody(array( |
|
| 2081 | - 'name' => 'p', |
|
| 2082 | - 'type' => HTML5::ENDTAG |
|
| 2083 | - )); |
|
| 2080 | + $this->inBody(array( |
|
| 2081 | + 'name' => 'p', |
|
| 2082 | + 'type' => HTML5::ENDTAG |
|
| 2083 | + )); |
|
| 2084 | 2084 | |
| 2085 | - /* Act as if a start tag token with the tag name "hr" had |
|
| 2085 | + /* Act as if a start tag token with the tag name "hr" had |
|
| 2086 | 2086 | been seen. */ |
| 2087 | - $this->inBody(array( |
|
| 2088 | - 'name' => 'hr', |
|
| 2089 | - 'type' => HTML5::ENDTAG |
|
| 2090 | - )); |
|
| 2087 | + $this->inBody(array( |
|
| 2088 | + 'name' => 'hr', |
|
| 2089 | + 'type' => HTML5::ENDTAG |
|
| 2090 | + )); |
|
| 2091 | 2091 | |
| 2092 | - /* Act as if an end tag token with the tag name "form" had |
|
| 2092 | + /* Act as if an end tag token with the tag name "form" had |
|
| 2093 | 2093 | been seen. */ |
| 2094 | - $this->inBody(array( |
|
| 2095 | - 'name' => 'form', |
|
| 2096 | - 'type' => HTML5::ENDTAG |
|
| 2097 | - )); |
|
| 2098 | - } |
|
| 2099 | - break; |
|
| 2100 | - |
|
| 2101 | - /* A start tag whose tag name is "textarea" */ |
|
| 2102 | - case 'textarea': |
|
| 2103 | - $this->insertElement($token); |
|
| 2104 | - |
|
| 2105 | - /* Switch the tokeniser's content model flag to the |
|
| 2094 | + $this->inBody(array( |
|
| 2095 | + 'name' => 'form', |
|
| 2096 | + 'type' => HTML5::ENDTAG |
|
| 2097 | + )); |
|
| 2098 | + } |
|
| 2099 | + break; |
|
| 2100 | + |
|
| 2101 | + /* A start tag whose tag name is "textarea" */ |
|
| 2102 | + case 'textarea': |
|
| 2103 | + $this->insertElement($token); |
|
| 2104 | + |
|
| 2105 | + /* Switch the tokeniser's content model flag to the |
|
| 2106 | 2106 | RCDATA state. */ |
| 2107 | - return HTML5::RCDATA; |
|
| 2108 | - break; |
|
| 2107 | + return HTML5::RCDATA; |
|
| 2108 | + break; |
|
| 2109 | 2109 | |
| 2110 | - /* A start tag whose tag name is one of: "iframe", "noembed", |
|
| 2110 | + /* A start tag whose tag name is one of: "iframe", "noembed", |
|
| 2111 | 2111 | "noframes" */ |
| 2112 | - case 'iframe': case 'noembed': case 'noframes': |
|
| 2113 | - $this->insertElement($token); |
|
| 2112 | + case 'iframe': case 'noembed': case 'noframes': |
|
| 2113 | + $this->insertElement($token); |
|
| 2114 | 2114 | |
| 2115 | - /* Switch the tokeniser's content model flag to the CDATA state. */ |
|
| 2116 | - return HTML5::CDATA; |
|
| 2117 | - break; |
|
| 2115 | + /* Switch the tokeniser's content model flag to the CDATA state. */ |
|
| 2116 | + return HTML5::CDATA; |
|
| 2117 | + break; |
|
| 2118 | 2118 | |
| 2119 | - /* A start tag whose tag name is "select" */ |
|
| 2120 | - case 'select': |
|
| 2121 | - /* Reconstruct the active formatting elements, if any. */ |
|
| 2122 | - $this->reconstructActiveFormattingElements(); |
|
| 2119 | + /* A start tag whose tag name is "select" */ |
|
| 2120 | + case 'select': |
|
| 2121 | + /* Reconstruct the active formatting elements, if any. */ |
|
| 2122 | + $this->reconstructActiveFormattingElements(); |
|
| 2123 | 2123 | |
| 2124 | - /* Insert an HTML element for the token. */ |
|
| 2125 | - $this->insertElement($token); |
|
| 2124 | + /* Insert an HTML element for the token. */ |
|
| 2125 | + $this->insertElement($token); |
|
| 2126 | 2126 | |
| 2127 | - /* Change the insertion mode to "in select". */ |
|
| 2128 | - $this->mode = self::IN_SELECT; |
|
| 2129 | - break; |
|
| 2127 | + /* Change the insertion mode to "in select". */ |
|
| 2128 | + $this->mode = self::IN_SELECT; |
|
| 2129 | + break; |
|
| 2130 | 2130 | |
| 2131 | - /* A start or end tag whose tag name is one of: "caption", "col", |
|
| 2131 | + /* A start or end tag whose tag name is one of: "caption", "col", |
|
| 2132 | 2132 | "colgroup", "frame", "frameset", "head", "option", "optgroup", |
| 2133 | 2133 | "tbody", "td", "tfoot", "th", "thead", "tr". */ |
| 2134 | - case 'caption': case 'col': case 'colgroup': case 'frame': |
|
| 2135 | - case 'frameset': case 'head': case 'option': case 'optgroup': |
|
| 2136 | - case 'tbody': case 'td': case 'tfoot': case 'th': case 'thead': |
|
| 2137 | - case 'tr': |
|
| 2138 | - // Parse error. Ignore the token. |
|
| 2139 | - break; |
|
| 2140 | - |
|
| 2141 | - /* A start or end tag whose tag name is one of: "event-source", |
|
| 2134 | + case 'caption': case 'col': case 'colgroup': case 'frame': |
|
| 2135 | + case 'frameset': case 'head': case 'option': case 'optgroup': |
|
| 2136 | + case 'tbody': case 'td': case 'tfoot': case 'th': case 'thead': |
|
| 2137 | + case 'tr': |
|
| 2138 | + // Parse error. Ignore the token. |
|
| 2139 | + break; |
|
| 2140 | + |
|
| 2141 | + /* A start or end tag whose tag name is one of: "event-source", |
|
| 2142 | 2142 | "section", "nav", "article", "aside", "header", "footer", |
| 2143 | 2143 | "datagrid", "command" */ |
| 2144 | - case 'event-source': case 'section': case 'nav': case 'article': |
|
| 2145 | - case 'aside': case 'header': case 'footer': case 'datagrid': |
|
| 2146 | - case 'command': |
|
| 2147 | - // Work in progress! |
|
| 2148 | - break; |
|
| 2149 | - |
|
| 2150 | - /* A start tag token not covered by the previous entries */ |
|
| 2151 | - default: |
|
| 2152 | - /* Reconstruct the active formatting elements, if any. */ |
|
| 2153 | - $this->reconstructActiveFormattingElements(); |
|
| 2154 | - |
|
| 2155 | - $this->insertElement($token, true, true); |
|
| 2156 | - break; |
|
| 2157 | - } |
|
| 2158 | - break; |
|
| 2159 | - |
|
| 2160 | - case HTML5::ENDTAG: |
|
| 2161 | - switch($token['name']) { |
|
| 2162 | - /* An end tag with the tag name "body" */ |
|
| 2163 | - case 'body': |
|
| 2164 | - /* If the second element in the stack of open elements is |
|
| 2144 | + case 'event-source': case 'section': case 'nav': case 'article': |
|
| 2145 | + case 'aside': case 'header': case 'footer': case 'datagrid': |
|
| 2146 | + case 'command': |
|
| 2147 | + // Work in progress! |
|
| 2148 | + break; |
|
| 2149 | + |
|
| 2150 | + /* A start tag token not covered by the previous entries */ |
|
| 2151 | + default: |
|
| 2152 | + /* Reconstruct the active formatting elements, if any. */ |
|
| 2153 | + $this->reconstructActiveFormattingElements(); |
|
| 2154 | + |
|
| 2155 | + $this->insertElement($token, true, true); |
|
| 2156 | + break; |
|
| 2157 | + } |
|
| 2158 | + break; |
|
| 2159 | + |
|
| 2160 | + case HTML5::ENDTAG: |
|
| 2161 | + switch($token['name']) { |
|
| 2162 | + /* An end tag with the tag name "body" */ |
|
| 2163 | + case 'body': |
|
| 2164 | + /* If the second element in the stack of open elements is |
|
| 2165 | 2165 | not a body element, this is a parse error. Ignore the token. |
| 2166 | 2166 | (innerHTML case) */ |
| 2167 | - if(count($this->stack) < 2 || $this->stack[1]->nodeName !== 'body') { |
|
| 2168 | - // Ignore. |
|
| 2167 | + if(count($this->stack) < 2 || $this->stack[1]->nodeName !== 'body') { |
|
| 2168 | + // Ignore. |
|
| 2169 | 2169 | |
| 2170 | - /* If the current node is not the body element, then this |
|
| 2170 | + /* If the current node is not the body element, then this |
|
| 2171 | 2171 | is a parse error. */ |
| 2172 | - } elseif(end($this->stack)->nodeName !== 'body') { |
|
| 2173 | - // Parse error. |
|
| 2174 | - } |
|
| 2172 | + } elseif(end($this->stack)->nodeName !== 'body') { |
|
| 2173 | + // Parse error. |
|
| 2174 | + } |
|
| 2175 | 2175 | |
| 2176 | - /* Change the insertion mode to "after body". */ |
|
| 2177 | - $this->mode = self::AFTER_BODY; |
|
| 2178 | - break; |
|
| 2176 | + /* Change the insertion mode to "after body". */ |
|
| 2177 | + $this->mode = self::AFTER_BODY; |
|
| 2178 | + break; |
|
| 2179 | 2179 | |
| 2180 | - /* An end tag with the tag name "html" */ |
|
| 2181 | - case 'html': |
|
| 2182 | - /* Act as if an end tag with tag name "body" had been seen, |
|
| 2180 | + /* An end tag with the tag name "html" */ |
|
| 2181 | + case 'html': |
|
| 2182 | + /* Act as if an end tag with tag name "body" had been seen, |
|
| 2183 | 2183 | then, if that token wasn't ignored, reprocess the current |
| 2184 | 2184 | token. */ |
| 2185 | - $this->inBody(array( |
|
| 2186 | - 'name' => 'body', |
|
| 2187 | - 'type' => HTML5::ENDTAG |
|
| 2188 | - )); |
|
| 2185 | + $this->inBody(array( |
|
| 2186 | + 'name' => 'body', |
|
| 2187 | + 'type' => HTML5::ENDTAG |
|
| 2188 | + )); |
|
| 2189 | 2189 | |
| 2190 | - return $this->afterBody($token); |
|
| 2191 | - break; |
|
| 2190 | + return $this->afterBody($token); |
|
| 2191 | + break; |
|
| 2192 | 2192 | |
| 2193 | - /* An end tag whose tag name is one of: "address", "blockquote", |
|
| 2193 | + /* An end tag whose tag name is one of: "address", "blockquote", |
|
| 2194 | 2194 | "center", "dir", "div", "dl", "fieldset", "listing", "menu", |
| 2195 | 2195 | "ol", "pre", "ul" */ |
| 2196 | - case 'address': case 'blockquote': case 'center': case 'dir': |
|
| 2197 | - case 'div': case 'dl': case 'fieldset': case 'listing': |
|
| 2198 | - case 'menu': case 'ol': case 'pre': case 'ul': |
|
| 2199 | - /* If the stack of open elements has an element in scope |
|
| 2196 | + case 'address': case 'blockquote': case 'center': case 'dir': |
|
| 2197 | + case 'div': case 'dl': case 'fieldset': case 'listing': |
|
| 2198 | + case 'menu': case 'ol': case 'pre': case 'ul': |
|
| 2199 | + /* If the stack of open elements has an element in scope |
|
| 2200 | 2200 | with the same tag name as that of the token, then generate |
| 2201 | 2201 | implied end tags. */ |
| 2202 | - if($this->elementInScope($token['name'])) { |
|
| 2203 | - $this->generateImpliedEndTags(); |
|
| 2202 | + if($this->elementInScope($token['name'])) { |
|
| 2203 | + $this->generateImpliedEndTags(); |
|
| 2204 | 2204 | |
| 2205 | - /* Now, if the current node is not an element with |
|
| 2205 | + /* Now, if the current node is not an element with |
|
| 2206 | 2206 | the same tag name as that of the token, then this |
| 2207 | 2207 | is a parse error. */ |
| 2208 | - // w/e |
|
| 2208 | + // w/e |
|
| 2209 | 2209 | |
| 2210 | - /* If the stack of open elements has an element in |
|
| 2210 | + /* If the stack of open elements has an element in |
|
| 2211 | 2211 | scope with the same tag name as that of the token, |
| 2212 | 2212 | then pop elements from this stack until an element |
| 2213 | 2213 | with that tag name has been popped from the stack. */ |
| 2214 | - for($n = count($this->stack) - 1; $n >= 0; $n--) { |
|
| 2215 | - if($this->stack[$n]->nodeName === $token['name']) { |
|
| 2216 | - $n = -1; |
|
| 2217 | - } |
|
| 2218 | - |
|
| 2219 | - array_pop($this->stack); |
|
| 2220 | - } |
|
| 2221 | - } |
|
| 2222 | - break; |
|
| 2223 | - |
|
| 2224 | - /* An end tag whose tag name is "form" */ |
|
| 2225 | - case 'form': |
|
| 2226 | - /* If the stack of open elements has an element in scope |
|
| 2214 | + for($n = count($this->stack) - 1; $n >= 0; $n--) { |
|
| 2215 | + if($this->stack[$n]->nodeName === $token['name']) { |
|
| 2216 | + $n = -1; |
|
| 2217 | + } |
|
| 2218 | + |
|
| 2219 | + array_pop($this->stack); |
|
| 2220 | + } |
|
| 2221 | + } |
|
| 2222 | + break; |
|
| 2223 | + |
|
| 2224 | + /* An end tag whose tag name is "form" */ |
|
| 2225 | + case 'form': |
|
| 2226 | + /* If the stack of open elements has an element in scope |
|
| 2227 | 2227 | with the same tag name as that of the token, then generate |
| 2228 | 2228 | implied end tags. */ |
| 2229 | - if($this->elementInScope($token['name'])) { |
|
| 2230 | - $this->generateImpliedEndTags(); |
|
| 2229 | + if($this->elementInScope($token['name'])) { |
|
| 2230 | + $this->generateImpliedEndTags(); |
|
| 2231 | 2231 | |
| 2232 | - } |
|
| 2232 | + } |
|
| 2233 | 2233 | |
| 2234 | - if(end($this->stack)->nodeName !== $token['name']) { |
|
| 2235 | - /* Now, if the current node is not an element with the |
|
| 2234 | + if(end($this->stack)->nodeName !== $token['name']) { |
|
| 2235 | + /* Now, if the current node is not an element with the |
|
| 2236 | 2236 | same tag name as that of the token, then this is a parse |
| 2237 | 2237 | error. */ |
| 2238 | - // w/e |
|
| 2238 | + // w/e |
|
| 2239 | 2239 | |
| 2240 | - } else { |
|
| 2241 | - /* Otherwise, if the current node is an element with |
|
| 2240 | + } else { |
|
| 2241 | + /* Otherwise, if the current node is an element with |
|
| 2242 | 2242 | the same tag name as that of the token pop that element |
| 2243 | 2243 | from the stack. */ |
| 2244 | - array_pop($this->stack); |
|
| 2245 | - } |
|
| 2244 | + array_pop($this->stack); |
|
| 2245 | + } |
|
| 2246 | 2246 | |
| 2247 | - /* In any case, set the form element pointer to null. */ |
|
| 2248 | - $this->form_pointer = null; |
|
| 2249 | - break; |
|
| 2247 | + /* In any case, set the form element pointer to null. */ |
|
| 2248 | + $this->form_pointer = null; |
|
| 2249 | + break; |
|
| 2250 | 2250 | |
| 2251 | - /* An end tag whose tag name is "p" */ |
|
| 2252 | - case 'p': |
|
| 2253 | - /* If the stack of open elements has a p element in scope, |
|
| 2251 | + /* An end tag whose tag name is "p" */ |
|
| 2252 | + case 'p': |
|
| 2253 | + /* If the stack of open elements has a p element in scope, |
|
| 2254 | 2254 | then generate implied end tags, except for p elements. */ |
| 2255 | - if($this->elementInScope('p')) { |
|
| 2256 | - $this->generateImpliedEndTags(array('p')); |
|
| 2255 | + if($this->elementInScope('p')) { |
|
| 2256 | + $this->generateImpliedEndTags(array('p')); |
|
| 2257 | 2257 | |
| 2258 | - /* If the current node is not a p element, then this is |
|
| 2258 | + /* If the current node is not a p element, then this is |
|
| 2259 | 2259 | a parse error. */ |
| 2260 | - // k |
|
| 2260 | + // k |
|
| 2261 | 2261 | |
| 2262 | - /* If the stack of open elements has a p element in |
|
| 2262 | + /* If the stack of open elements has a p element in |
|
| 2263 | 2263 | scope, then pop elements from this stack until the stack |
| 2264 | 2264 | no longer has a p element in scope. */ |
| 2265 | - for($n = count($this->stack) - 1; $n >= 0; $n--) { |
|
| 2266 | - if($this->elementInScope('p')) { |
|
| 2267 | - array_pop($this->stack); |
|
| 2268 | - |
|
| 2269 | - } else { |
|
| 2270 | - break; |
|
| 2271 | - } |
|
| 2272 | - } |
|
| 2273 | - } |
|
| 2274 | - break; |
|
| 2275 | - |
|
| 2276 | - /* An end tag whose tag name is "dd", "dt", or "li" */ |
|
| 2277 | - case 'dd': case 'dt': case 'li': |
|
| 2278 | - /* If the stack of open elements has an element in scope |
|
| 2265 | + for($n = count($this->stack) - 1; $n >= 0; $n--) { |
|
| 2266 | + if($this->elementInScope('p')) { |
|
| 2267 | + array_pop($this->stack); |
|
| 2268 | + |
|
| 2269 | + } else { |
|
| 2270 | + break; |
|
| 2271 | + } |
|
| 2272 | + } |
|
| 2273 | + } |
|
| 2274 | + break; |
|
| 2275 | + |
|
| 2276 | + /* An end tag whose tag name is "dd", "dt", or "li" */ |
|
| 2277 | + case 'dd': case 'dt': case 'li': |
|
| 2278 | + /* If the stack of open elements has an element in scope |
|
| 2279 | 2279 | whose tag name matches the tag name of the token, then |
| 2280 | 2280 | generate implied end tags, except for elements with the |
| 2281 | 2281 | same tag name as the token. */ |
| 2282 | - if($this->elementInScope($token['name'])) { |
|
| 2283 | - $this->generateImpliedEndTags(array($token['name'])); |
|
| 2282 | + if($this->elementInScope($token['name'])) { |
|
| 2283 | + $this->generateImpliedEndTags(array($token['name'])); |
|
| 2284 | 2284 | |
| 2285 | - /* If the current node is not an element with the same |
|
| 2285 | + /* If the current node is not an element with the same |
|
| 2286 | 2286 | tag name as the token, then this is a parse error. */ |
| 2287 | - // w/e |
|
| 2287 | + // w/e |
|
| 2288 | 2288 | |
| 2289 | - /* If the stack of open elements has an element in scope |
|
| 2289 | + /* If the stack of open elements has an element in scope |
|
| 2290 | 2290 | whose tag name matches the tag name of the token, then |
| 2291 | 2291 | pop elements from this stack until an element with that |
| 2292 | 2292 | tag name has been popped from the stack. */ |
| 2293 | - for($n = count($this->stack) - 1; $n >= 0; $n--) { |
|
| 2294 | - if($this->stack[$n]->nodeName === $token['name']) { |
|
| 2295 | - $n = -1; |
|
| 2296 | - } |
|
| 2293 | + for($n = count($this->stack) - 1; $n >= 0; $n--) { |
|
| 2294 | + if($this->stack[$n]->nodeName === $token['name']) { |
|
| 2295 | + $n = -1; |
|
| 2296 | + } |
|
| 2297 | 2297 | |
| 2298 | - array_pop($this->stack); |
|
| 2299 | - } |
|
| 2300 | - } |
|
| 2301 | - break; |
|
| 2298 | + array_pop($this->stack); |
|
| 2299 | + } |
|
| 2300 | + } |
|
| 2301 | + break; |
|
| 2302 | 2302 | |
| 2303 | - /* An end tag whose tag name is one of: "h1", "h2", "h3", "h4", |
|
| 2303 | + /* An end tag whose tag name is one of: "h1", "h2", "h3", "h4", |
|
| 2304 | 2304 | "h5", "h6" */ |
| 2305 | - case 'h1': case 'h2': case 'h3': case 'h4': case 'h5': case 'h6': |
|
| 2306 | - $elements = array('h1', 'h2', 'h3', 'h4', 'h5', 'h6'); |
|
| 2305 | + case 'h1': case 'h2': case 'h3': case 'h4': case 'h5': case 'h6': |
|
| 2306 | + $elements = array('h1', 'h2', 'h3', 'h4', 'h5', 'h6'); |
|
| 2307 | 2307 | |
| 2308 | - /* If the stack of open elements has in scope an element whose |
|
| 2308 | + /* If the stack of open elements has in scope an element whose |
|
| 2309 | 2309 | tag name is one of "h1", "h2", "h3", "h4", "h5", or "h6", then |
| 2310 | 2310 | generate implied end tags. */ |
| 2311 | - if($this->elementInScope($elements)) { |
|
| 2312 | - $this->generateImpliedEndTags(); |
|
| 2311 | + if($this->elementInScope($elements)) { |
|
| 2312 | + $this->generateImpliedEndTags(); |
|
| 2313 | 2313 | |
| 2314 | - /* Now, if the current node is not an element with the same |
|
| 2314 | + /* Now, if the current node is not an element with the same |
|
| 2315 | 2315 | tag name as that of the token, then this is a parse error. */ |
| 2316 | - // w/e |
|
| 2316 | + // w/e |
|
| 2317 | 2317 | |
| 2318 | - /* If the stack of open elements has in scope an element |
|
| 2318 | + /* If the stack of open elements has in scope an element |
|
| 2319 | 2319 | whose tag name is one of "h1", "h2", "h3", "h4", "h5", or |
| 2320 | 2320 | "h6", then pop elements from the stack until an element |
| 2321 | 2321 | with one of those tag names has been popped from the stack. */ |
| 2322 | - while($this->elementInScope($elements)) { |
|
| 2323 | - array_pop($this->stack); |
|
| 2324 | - } |
|
| 2325 | - } |
|
| 2326 | - break; |
|
| 2322 | + while($this->elementInScope($elements)) { |
|
| 2323 | + array_pop($this->stack); |
|
| 2324 | + } |
|
| 2325 | + } |
|
| 2326 | + break; |
|
| 2327 | 2327 | |
| 2328 | - /* An end tag whose tag name is one of: "a", "b", "big", "em", |
|
| 2328 | + /* An end tag whose tag name is one of: "a", "b", "big", "em", |
|
| 2329 | 2329 | "font", "i", "nobr", "s", "small", "strike", "strong", "tt", "u" */ |
| 2330 | - case 'a': case 'b': case 'big': case 'em': case 'font': |
|
| 2331 | - case 'i': case 'nobr': case 's': case 'small': case 'strike': |
|
| 2332 | - case 'strong': case 'tt': case 'u': |
|
| 2333 | - /* 1. Let the formatting element be the last element in |
|
| 2330 | + case 'a': case 'b': case 'big': case 'em': case 'font': |
|
| 2331 | + case 'i': case 'nobr': case 's': case 'small': case 'strike': |
|
| 2332 | + case 'strong': case 'tt': case 'u': |
|
| 2333 | + /* 1. Let the formatting element be the last element in |
|
| 2334 | 2334 | the list of active formatting elements that: |
| 2335 | 2335 | * is between the end of the list and the last scope |
| 2336 | 2336 | marker in the list, if any, or the start of the list |
| 2337 | 2337 | otherwise, and |
| 2338 | 2338 | * has the same tag name as the token. |
| 2339 | 2339 | */ |
| 2340 | - while(true) { |
|
| 2341 | - for($a = count($this->a_formatting) - 1; $a >= 0; $a--) { |
|
| 2342 | - if($this->a_formatting[$a] === self::MARKER) { |
|
| 2343 | - break; |
|
| 2344 | - |
|
| 2345 | - } elseif($this->a_formatting[$a]->tagName === $token['name']) { |
|
| 2346 | - $formatting_element = $this->a_formatting[$a]; |
|
| 2347 | - $in_stack = in_array($formatting_element, $this->stack, true); |
|
| 2348 | - $fe_af_pos = $a; |
|
| 2349 | - break; |
|
| 2350 | - } |
|
| 2351 | - } |
|
| 2352 | - |
|
| 2353 | - /* If there is no such node, or, if that node is |
|
| 2340 | + while(true) { |
|
| 2341 | + for($a = count($this->a_formatting) - 1; $a >= 0; $a--) { |
|
| 2342 | + if($this->a_formatting[$a] === self::MARKER) { |
|
| 2343 | + break; |
|
| 2344 | + |
|
| 2345 | + } elseif($this->a_formatting[$a]->tagName === $token['name']) { |
|
| 2346 | + $formatting_element = $this->a_formatting[$a]; |
|
| 2347 | + $in_stack = in_array($formatting_element, $this->stack, true); |
|
| 2348 | + $fe_af_pos = $a; |
|
| 2349 | + break; |
|
| 2350 | + } |
|
| 2351 | + } |
|
| 2352 | + |
|
| 2353 | + /* If there is no such node, or, if that node is |
|
| 2354 | 2354 | also in the stack of open elements but the element |
| 2355 | 2355 | is not in scope, then this is a parse error. Abort |
| 2356 | 2356 | these steps. The token is ignored. */ |
| 2357 | - if(!isset($formatting_element) || ($in_stack && |
|
| 2358 | - !$this->elementInScope($token['name']))) { |
|
| 2359 | - break; |
|
| 2357 | + if(!isset($formatting_element) || ($in_stack && |
|
| 2358 | + !$this->elementInScope($token['name']))) { |
|
| 2359 | + break; |
|
| 2360 | 2360 | |
| 2361 | - /* Otherwise, if there is such a node, but that node |
|
| 2361 | + /* Otherwise, if there is such a node, but that node |
|
| 2362 | 2362 | is not in the stack of open elements, then this is a |
| 2363 | 2363 | parse error; remove the element from the list, and |
| 2364 | 2364 | abort these steps. */ |
| 2365 | - } elseif(isset($formatting_element) && !$in_stack) { |
|
| 2366 | - unset($this->a_formatting[$fe_af_pos]); |
|
| 2367 | - $this->a_formatting = array_merge($this->a_formatting); |
|
| 2368 | - break; |
|
| 2369 | - } |
|
| 2365 | + } elseif(isset($formatting_element) && !$in_stack) { |
|
| 2366 | + unset($this->a_formatting[$fe_af_pos]); |
|
| 2367 | + $this->a_formatting = array_merge($this->a_formatting); |
|
| 2368 | + break; |
|
| 2369 | + } |
|
| 2370 | 2370 | |
| 2371 | - /* 2. Let the furthest block be the topmost node in the |
|
| 2371 | + /* 2. Let the furthest block be the topmost node in the |
|
| 2372 | 2372 | stack of open elements that is lower in the stack |
| 2373 | 2373 | than the formatting element, and is not an element in |
| 2374 | 2374 | the phrasing or formatting categories. There might |
| 2375 | 2375 | not be one. */ |
| 2376 | - $fe_s_pos = array_search($formatting_element, $this->stack, true); |
|
| 2377 | - $length = count($this->stack); |
|
| 2376 | + $fe_s_pos = array_search($formatting_element, $this->stack, true); |
|
| 2377 | + $length = count($this->stack); |
|
| 2378 | 2378 | |
| 2379 | - for($s = $fe_s_pos + 1; $s < $length; $s++) { |
|
| 2380 | - $category = $this->getElementCategory($this->stack[$s]->nodeName); |
|
| 2379 | + for($s = $fe_s_pos + 1; $s < $length; $s++) { |
|
| 2380 | + $category = $this->getElementCategory($this->stack[$s]->nodeName); |
|
| 2381 | 2381 | |
| 2382 | - if($category !== self::PHRASING && $category !== self::FORMATTING) { |
|
| 2383 | - $furthest_block = $this->stack[$s]; |
|
| 2384 | - } |
|
| 2385 | - } |
|
| 2382 | + if($category !== self::PHRASING && $category !== self::FORMATTING) { |
|
| 2383 | + $furthest_block = $this->stack[$s]; |
|
| 2384 | + } |
|
| 2385 | + } |
|
| 2386 | 2386 | |
| 2387 | - /* 3. If there is no furthest block, then the UA must |
|
| 2387 | + /* 3. If there is no furthest block, then the UA must |
|
| 2388 | 2388 | skip the subsequent steps and instead just pop all |
| 2389 | 2389 | the nodes from the bottom of the stack of open |
| 2390 | 2390 | elements, from the current node up to the formatting |
| 2391 | 2391 | element, and remove the formatting element from the |
| 2392 | 2392 | list of active formatting elements. */ |
| 2393 | - if(!isset($furthest_block)) { |
|
| 2394 | - for($n = $length - 1; $n >= $fe_s_pos; $n--) { |
|
| 2395 | - array_pop($this->stack); |
|
| 2396 | - } |
|
| 2393 | + if(!isset($furthest_block)) { |
|
| 2394 | + for($n = $length - 1; $n >= $fe_s_pos; $n--) { |
|
| 2395 | + array_pop($this->stack); |
|
| 2396 | + } |
|
| 2397 | 2397 | |
| 2398 | - unset($this->a_formatting[$fe_af_pos]); |
|
| 2399 | - $this->a_formatting = array_merge($this->a_formatting); |
|
| 2400 | - break; |
|
| 2401 | - } |
|
| 2398 | + unset($this->a_formatting[$fe_af_pos]); |
|
| 2399 | + $this->a_formatting = array_merge($this->a_formatting); |
|
| 2400 | + break; |
|
| 2401 | + } |
|
| 2402 | 2402 | |
| 2403 | - /* 4. Let the common ancestor be the element |
|
| 2403 | + /* 4. Let the common ancestor be the element |
|
| 2404 | 2404 | immediately above the formatting element in the stack |
| 2405 | 2405 | of open elements. */ |
| 2406 | - $common_ancestor = $this->stack[$fe_s_pos - 1]; |
|
| 2406 | + $common_ancestor = $this->stack[$fe_s_pos - 1]; |
|
| 2407 | 2407 | |
| 2408 | - /* 5. If the furthest block has a parent node, then |
|
| 2408 | + /* 5. If the furthest block has a parent node, then |
|
| 2409 | 2409 | remove the furthest block from its parent node. */ |
| 2410 | - if($furthest_block->parentNode !== null) { |
|
| 2411 | - $furthest_block->parentNode->removeChild($furthest_block); |
|
| 2412 | - } |
|
| 2410 | + if($furthest_block->parentNode !== null) { |
|
| 2411 | + $furthest_block->parentNode->removeChild($furthest_block); |
|
| 2412 | + } |
|
| 2413 | 2413 | |
| 2414 | - /* 6. Let a bookmark note the position of the |
|
| 2414 | + /* 6. Let a bookmark note the position of the |
|
| 2415 | 2415 | formatting element in the list of active formatting |
| 2416 | 2416 | elements relative to the elements on either side |
| 2417 | 2417 | of it in the list. */ |
| 2418 | - $bookmark = $fe_af_pos; |
|
| 2418 | + $bookmark = $fe_af_pos; |
|
| 2419 | 2419 | |
| 2420 | - /* 7. Let node and last node be the furthest block. |
|
| 2420 | + /* 7. Let node and last node be the furthest block. |
|
| 2421 | 2421 | Follow these steps: */ |
| 2422 | - $node = $furthest_block; |
|
| 2423 | - $last_node = $furthest_block; |
|
| 2422 | + $node = $furthest_block; |
|
| 2423 | + $last_node = $furthest_block; |
|
| 2424 | 2424 | |
| 2425 | - while(true) { |
|
| 2426 | - for($n = array_search($node, $this->stack, true) - 1; $n >= 0; $n--) { |
|
| 2427 | - /* 7.1 Let node be the element immediately |
|
| 2425 | + while(true) { |
|
| 2426 | + for($n = array_search($node, $this->stack, true) - 1; $n >= 0; $n--) { |
|
| 2427 | + /* 7.1 Let node be the element immediately |
|
| 2428 | 2428 | prior to node in the stack of open elements. */ |
| 2429 | - $node = $this->stack[$n]; |
|
| 2429 | + $node = $this->stack[$n]; |
|
| 2430 | 2430 | |
| 2431 | - /* 7.2 If node is not in the list of active |
|
| 2431 | + /* 7.2 If node is not in the list of active |
|
| 2432 | 2432 | formatting elements, then remove node from |
| 2433 | 2433 | the stack of open elements and then go back |
| 2434 | 2434 | to step 1. */ |
| 2435 | - if(!in_array($node, $this->a_formatting, true)) { |
|
| 2436 | - unset($this->stack[$n]); |
|
| 2437 | - $this->stack = array_merge($this->stack); |
|
| 2435 | + if(!in_array($node, $this->a_formatting, true)) { |
|
| 2436 | + unset($this->stack[$n]); |
|
| 2437 | + $this->stack = array_merge($this->stack); |
|
| 2438 | 2438 | |
| 2439 | - } else { |
|
| 2440 | - break; |
|
| 2441 | - } |
|
| 2442 | - } |
|
| 2439 | + } else { |
|
| 2440 | + break; |
|
| 2441 | + } |
|
| 2442 | + } |
|
| 2443 | 2443 | |
| 2444 | - /* 7.3 Otherwise, if node is the formatting |
|
| 2444 | + /* 7.3 Otherwise, if node is the formatting |
|
| 2445 | 2445 | element, then go to the next step in the overall |
| 2446 | 2446 | algorithm. */ |
| 2447 | - if($node === $formatting_element) { |
|
| 2448 | - break; |
|
| 2447 | + if($node === $formatting_element) { |
|
| 2448 | + break; |
|
| 2449 | 2449 | |
| 2450 | - /* 7.4 Otherwise, if last node is the furthest |
|
| 2450 | + /* 7.4 Otherwise, if last node is the furthest |
|
| 2451 | 2451 | block, then move the aforementioned bookmark to |
| 2452 | 2452 | be immediately after the node in the list of |
| 2453 | 2453 | active formatting elements. */ |
| 2454 | - } elseif($last_node === $furthest_block) { |
|
| 2455 | - $bookmark = array_search($node, $this->a_formatting, true) + 1; |
|
| 2456 | - } |
|
| 2454 | + } elseif($last_node === $furthest_block) { |
|
| 2455 | + $bookmark = array_search($node, $this->a_formatting, true) + 1; |
|
| 2456 | + } |
|
| 2457 | 2457 | |
| 2458 | - /* 7.5 If node has any children, perform a |
|
| 2458 | + /* 7.5 If node has any children, perform a |
|
| 2459 | 2459 | shallow clone of node, replace the entry for |
| 2460 | 2460 | node in the list of active formatting elements |
| 2461 | 2461 | with an entry for the clone, replace the entry |
| 2462 | 2462 | for node in the stack of open elements with an |
| 2463 | 2463 | entry for the clone, and let node be the clone. */ |
| 2464 | - if($node->hasChildNodes()) { |
|
| 2465 | - $clone = $node->cloneNode(); |
|
| 2466 | - $s_pos = array_search($node, $this->stack, true); |
|
| 2467 | - $a_pos = array_search($node, $this->a_formatting, true); |
|
| 2464 | + if($node->hasChildNodes()) { |
|
| 2465 | + $clone = $node->cloneNode(); |
|
| 2466 | + $s_pos = array_search($node, $this->stack, true); |
|
| 2467 | + $a_pos = array_search($node, $this->a_formatting, true); |
|
| 2468 | 2468 | |
| 2469 | - $this->stack[$s_pos] = $clone; |
|
| 2470 | - $this->a_formatting[$a_pos] = $clone; |
|
| 2471 | - $node = $clone; |
|
| 2472 | - } |
|
| 2469 | + $this->stack[$s_pos] = $clone; |
|
| 2470 | + $this->a_formatting[$a_pos] = $clone; |
|
| 2471 | + $node = $clone; |
|
| 2472 | + } |
|
| 2473 | 2473 | |
| 2474 | - /* 7.6 Insert last node into node, first removing |
|
| 2474 | + /* 7.6 Insert last node into node, first removing |
|
| 2475 | 2475 | it from its previous parent node if any. */ |
| 2476 | - if($last_node->parentNode !== null) { |
|
| 2477 | - $last_node->parentNode->removeChild($last_node); |
|
| 2478 | - } |
|
| 2476 | + if($last_node->parentNode !== null) { |
|
| 2477 | + $last_node->parentNode->removeChild($last_node); |
|
| 2478 | + } |
|
| 2479 | 2479 | |
| 2480 | - $node->appendChild($last_node); |
|
| 2480 | + $node->appendChild($last_node); |
|
| 2481 | 2481 | |
| 2482 | - /* 7.7 Let last node be node. */ |
|
| 2483 | - $last_node = $node; |
|
| 2484 | - } |
|
| 2482 | + /* 7.7 Let last node be node. */ |
|
| 2483 | + $last_node = $node; |
|
| 2484 | + } |
|
| 2485 | 2485 | |
| 2486 | - /* 8. Insert whatever last node ended up being in |
|
| 2486 | + /* 8. Insert whatever last node ended up being in |
|
| 2487 | 2487 | the previous step into the common ancestor node, |
| 2488 | 2488 | first removing it from its previous parent node if |
| 2489 | 2489 | any. */ |
| 2490 | - if($last_node->parentNode !== null) { |
|
| 2491 | - $last_node->parentNode->removeChild($last_node); |
|
| 2492 | - } |
|
| 2490 | + if($last_node->parentNode !== null) { |
|
| 2491 | + $last_node->parentNode->removeChild($last_node); |
|
| 2492 | + } |
|
| 2493 | 2493 | |
| 2494 | - $common_ancestor->appendChild($last_node); |
|
| 2494 | + $common_ancestor->appendChild($last_node); |
|
| 2495 | 2495 | |
| 2496 | - /* 9. Perform a shallow clone of the formatting |
|
| 2496 | + /* 9. Perform a shallow clone of the formatting |
|
| 2497 | 2497 | element. */ |
| 2498 | - $clone = $formatting_element->cloneNode(); |
|
| 2498 | + $clone = $formatting_element->cloneNode(); |
|
| 2499 | 2499 | |
| 2500 | - /* 10. Take all of the child nodes of the furthest |
|
| 2500 | + /* 10. Take all of the child nodes of the furthest |
|
| 2501 | 2501 | block and append them to the clone created in the |
| 2502 | 2502 | last step. */ |
| 2503 | - while($furthest_block->hasChildNodes()) { |
|
| 2504 | - $child = $furthest_block->firstChild; |
|
| 2505 | - $furthest_block->removeChild($child); |
|
| 2506 | - $clone->appendChild($child); |
|
| 2507 | - } |
|
| 2503 | + while($furthest_block->hasChildNodes()) { |
|
| 2504 | + $child = $furthest_block->firstChild; |
|
| 2505 | + $furthest_block->removeChild($child); |
|
| 2506 | + $clone->appendChild($child); |
|
| 2507 | + } |
|
| 2508 | 2508 | |
| 2509 | - /* 11. Append that clone to the furthest block. */ |
|
| 2510 | - $furthest_block->appendChild($clone); |
|
| 2509 | + /* 11. Append that clone to the furthest block. */ |
|
| 2510 | + $furthest_block->appendChild($clone); |
|
| 2511 | 2511 | |
| 2512 | - /* 12. Remove the formatting element from the list |
|
| 2512 | + /* 12. Remove the formatting element from the list |
|
| 2513 | 2513 | of active formatting elements, and insert the clone |
| 2514 | 2514 | into the list of active formatting elements at the |
| 2515 | 2515 | position of the aforementioned bookmark. */ |
| 2516 | - $fe_af_pos = array_search($formatting_element, $this->a_formatting, true); |
|
| 2517 | - unset($this->a_formatting[$fe_af_pos]); |
|
| 2518 | - $this->a_formatting = array_merge($this->a_formatting); |
|
| 2516 | + $fe_af_pos = array_search($formatting_element, $this->a_formatting, true); |
|
| 2517 | + unset($this->a_formatting[$fe_af_pos]); |
|
| 2518 | + $this->a_formatting = array_merge($this->a_formatting); |
|
| 2519 | 2519 | |
| 2520 | - $af_part1 = array_slice($this->a_formatting, 0, $bookmark - 1); |
|
| 2521 | - $af_part2 = array_slice($this->a_formatting, $bookmark, count($this->a_formatting)); |
|
| 2522 | - $this->a_formatting = array_merge($af_part1, array($clone), $af_part2); |
|
| 2520 | + $af_part1 = array_slice($this->a_formatting, 0, $bookmark - 1); |
|
| 2521 | + $af_part2 = array_slice($this->a_formatting, $bookmark, count($this->a_formatting)); |
|
| 2522 | + $this->a_formatting = array_merge($af_part1, array($clone), $af_part2); |
|
| 2523 | 2523 | |
| 2524 | - /* 13. Remove the formatting element from the stack |
|
| 2524 | + /* 13. Remove the formatting element from the stack |
|
| 2525 | 2525 | of open elements, and insert the clone into the stack |
| 2526 | 2526 | of open elements immediately after (i.e. in a more |
| 2527 | 2527 | deeply nested position than) the position of the |
| 2528 | 2528 | furthest block in that stack. */ |
| 2529 | - $fe_s_pos = array_search($formatting_element, $this->stack, true); |
|
| 2530 | - $fb_s_pos = array_search($furthest_block, $this->stack, true); |
|
| 2531 | - unset($this->stack[$fe_s_pos]); |
|
| 2529 | + $fe_s_pos = array_search($formatting_element, $this->stack, true); |
|
| 2530 | + $fb_s_pos = array_search($furthest_block, $this->stack, true); |
|
| 2531 | + unset($this->stack[$fe_s_pos]); |
|
| 2532 | 2532 | |
| 2533 | - $s_part1 = array_slice($this->stack, 0, $fb_s_pos); |
|
| 2534 | - $s_part2 = array_slice($this->stack, $fb_s_pos + 1, count($this->stack)); |
|
| 2535 | - $this->stack = array_merge($s_part1, array($clone), $s_part2); |
|
| 2533 | + $s_part1 = array_slice($this->stack, 0, $fb_s_pos); |
|
| 2534 | + $s_part2 = array_slice($this->stack, $fb_s_pos + 1, count($this->stack)); |
|
| 2535 | + $this->stack = array_merge($s_part1, array($clone), $s_part2); |
|
| 2536 | 2536 | |
| 2537 | - /* 14. Jump back to step 1 in this series of steps. */ |
|
| 2538 | - unset($formatting_element, $fe_af_pos, $fe_s_pos, $furthest_block); |
|
| 2539 | - } |
|
| 2540 | - break; |
|
| 2537 | + /* 14. Jump back to step 1 in this series of steps. */ |
|
| 2538 | + unset($formatting_element, $fe_af_pos, $fe_s_pos, $furthest_block); |
|
| 2539 | + } |
|
| 2540 | + break; |
|
| 2541 | 2541 | |
| 2542 | - /* An end tag token whose tag name is one of: "button", |
|
| 2542 | + /* An end tag token whose tag name is one of: "button", |
|
| 2543 | 2543 | "marquee", "object" */ |
| 2544 | - case 'button': case 'marquee': case 'object': |
|
| 2545 | - /* If the stack of open elements has an element in scope whose |
|
| 2544 | + case 'button': case 'marquee': case 'object': |
|
| 2545 | + /* If the stack of open elements has an element in scope whose |
|
| 2546 | 2546 | tag name matches the tag name of the token, then generate implied |
| 2547 | 2547 | tags. */ |
| 2548 | - if($this->elementInScope($token['name'])) { |
|
| 2549 | - $this->generateImpliedEndTags(); |
|
| 2548 | + if($this->elementInScope($token['name'])) { |
|
| 2549 | + $this->generateImpliedEndTags(); |
|
| 2550 | 2550 | |
| 2551 | - /* Now, if the current node is not an element with the same |
|
| 2551 | + /* Now, if the current node is not an element with the same |
|
| 2552 | 2552 | tag name as the token, then this is a parse error. */ |
| 2553 | - // k |
|
| 2553 | + // k |
|
| 2554 | 2554 | |
| 2555 | - /* Now, if the stack of open elements has an element in scope |
|
| 2555 | + /* Now, if the stack of open elements has an element in scope |
|
| 2556 | 2556 | whose tag name matches the tag name of the token, then pop |
| 2557 | 2557 | elements from the stack until that element has been popped from |
| 2558 | 2558 | the stack, and clear the list of active formatting elements up |
| 2559 | 2559 | to the last marker. */ |
| 2560 | - for($n = count($this->stack) - 1; $n >= 0; $n--) { |
|
| 2561 | - if($this->stack[$n]->nodeName === $token['name']) { |
|
| 2562 | - $n = -1; |
|
| 2563 | - } |
|
| 2560 | + for($n = count($this->stack) - 1; $n >= 0; $n--) { |
|
| 2561 | + if($this->stack[$n]->nodeName === $token['name']) { |
|
| 2562 | + $n = -1; |
|
| 2563 | + } |
|
| 2564 | 2564 | |
| 2565 | - array_pop($this->stack); |
|
| 2566 | - } |
|
| 2565 | + array_pop($this->stack); |
|
| 2566 | + } |
|
| 2567 | 2567 | |
| 2568 | - $marker = end(array_keys($this->a_formatting, self::MARKER, true)); |
|
| 2568 | + $marker = end(array_keys($this->a_formatting, self::MARKER, true)); |
|
| 2569 | 2569 | |
| 2570 | - for($n = count($this->a_formatting) - 1; $n > $marker; $n--) { |
|
| 2571 | - array_pop($this->a_formatting); |
|
| 2572 | - } |
|
| 2573 | - } |
|
| 2574 | - break; |
|
| 2570 | + for($n = count($this->a_formatting) - 1; $n > $marker; $n--) { |
|
| 2571 | + array_pop($this->a_formatting); |
|
| 2572 | + } |
|
| 2573 | + } |
|
| 2574 | + break; |
|
| 2575 | 2575 | |
| 2576 | - /* Or an end tag whose tag name is one of: "area", "basefont", |
|
| 2576 | + /* Or an end tag whose tag name is one of: "area", "basefont", |
|
| 2577 | 2577 | "bgsound", "br", "embed", "hr", "iframe", "image", "img", |
| 2578 | 2578 | "input", "isindex", "noembed", "noframes", "param", "select", |
| 2579 | 2579 | "spacer", "table", "textarea", "wbr" */ |
| 2580 | - case 'area': case 'basefont': case 'bgsound': case 'br': |
|
| 2581 | - case 'embed': case 'hr': case 'iframe': case 'image': |
|
| 2582 | - case 'img': case 'input': case 'isindex': case 'noembed': |
|
| 2583 | - case 'noframes': case 'param': case 'select': case 'spacer': |
|
| 2584 | - case 'table': case 'textarea': case 'wbr': |
|
| 2585 | - // Parse error. Ignore the token. |
|
| 2586 | - break; |
|
| 2587 | - |
|
| 2588 | - /* An end tag token not covered by the previous entries */ |
|
| 2589 | - default: |
|
| 2590 | - for($n = count($this->stack) - 1; $n >= 0; $n--) { |
|
| 2591 | - /* Initialise node to be the current node (the bottommost |
|
| 2580 | + case 'area': case 'basefont': case 'bgsound': case 'br': |
|
| 2581 | + case 'embed': case 'hr': case 'iframe': case 'image': |
|
| 2582 | + case 'img': case 'input': case 'isindex': case 'noembed': |
|
| 2583 | + case 'noframes': case 'param': case 'select': case 'spacer': |
|
| 2584 | + case 'table': case 'textarea': case 'wbr': |
|
| 2585 | + // Parse error. Ignore the token. |
|
| 2586 | + break; |
|
| 2587 | + |
|
| 2588 | + /* An end tag token not covered by the previous entries */ |
|
| 2589 | + default: |
|
| 2590 | + for($n = count($this->stack) - 1; $n >= 0; $n--) { |
|
| 2591 | + /* Initialise node to be the current node (the bottommost |
|
| 2592 | 2592 | node of the stack). */ |
| 2593 | - $node = end($this->stack); |
|
| 2593 | + $node = end($this->stack); |
|
| 2594 | 2594 | |
| 2595 | - /* If node has the same tag name as the end tag token, |
|
| 2595 | + /* If node has the same tag name as the end tag token, |
|
| 2596 | 2596 | then: */ |
| 2597 | - if($token['name'] === $node->nodeName) { |
|
| 2598 | - /* Generate implied end tags. */ |
|
| 2599 | - $this->generateImpliedEndTags(); |
|
| 2597 | + if($token['name'] === $node->nodeName) { |
|
| 2598 | + /* Generate implied end tags. */ |
|
| 2599 | + $this->generateImpliedEndTags(); |
|
| 2600 | 2600 | |
| 2601 | - /* If the tag name of the end tag token does not |
|
| 2601 | + /* If the tag name of the end tag token does not |
|
| 2602 | 2602 | match the tag name of the current node, this is a |
| 2603 | 2603 | parse error. */ |
| 2604 | - // k |
|
| 2604 | + // k |
|
| 2605 | 2605 | |
| 2606 | - /* Pop all the nodes from the current node up to |
|
| 2606 | + /* Pop all the nodes from the current node up to |
|
| 2607 | 2607 | node, including node, then stop this algorithm. */ |
| 2608 | - for($x = count($this->stack) - $n; $x >= $n; $x--) { |
|
| 2609 | - array_pop($this->stack); |
|
| 2610 | - } |
|
| 2608 | + for($x = count($this->stack) - $n; $x >= $n; $x--) { |
|
| 2609 | + array_pop($this->stack); |
|
| 2610 | + } |
|
| 2611 | 2611 | |
| 2612 | - } else { |
|
| 2613 | - $category = $this->getElementCategory($node); |
|
| 2612 | + } else { |
|
| 2613 | + $category = $this->getElementCategory($node); |
|
| 2614 | 2614 | |
| 2615 | - if($category !== self::SPECIAL && $category !== self::SCOPING) { |
|
| 2616 | - /* Otherwise, if node is in neither the formatting |
|
| 2615 | + if($category !== self::SPECIAL && $category !== self::SCOPING) { |
|
| 2616 | + /* Otherwise, if node is in neither the formatting |
|
| 2617 | 2617 | category nor the phrasing category, then this is a |
| 2618 | 2618 | parse error. Stop this algorithm. The end tag token |
| 2619 | 2619 | is ignored. */ |
| 2620 | - return false; |
|
| 2621 | - } |
|
| 2622 | - } |
|
| 2623 | - } |
|
| 2624 | - break; |
|
| 2625 | - } |
|
| 2626 | - break; |
|
| 2627 | - } |
|
| 2628 | - } |
|
| 2629 | - |
|
| 2630 | - private function inTable($token) { |
|
| 2631 | - $clear = array('html', 'table'); |
|
| 2632 | - |
|
| 2633 | - /* A character token that is one of one of U+0009 CHARACTER TABULATION, |
|
| 2620 | + return false; |
|
| 2621 | + } |
|
| 2622 | + } |
|
| 2623 | + } |
|
| 2624 | + break; |
|
| 2625 | + } |
|
| 2626 | + break; |
|
| 2627 | + } |
|
| 2628 | + } |
|
| 2629 | + |
|
| 2630 | + private function inTable($token) { |
|
| 2631 | + $clear = array('html', 'table'); |
|
| 2632 | + |
|
| 2633 | + /* A character token that is one of one of U+0009 CHARACTER TABULATION, |
|
| 2634 | 2634 | U+000A LINE FEED (LF), U+000B LINE TABULATION, U+000C FORM FEED (FF), |
| 2635 | 2635 | or U+0020 SPACE */ |
| 2636 | - if($token['type'] === HTML5::CHARACTR && |
|
| 2637 | - preg_match('/^[\t\n\x0b\x0c ]+$/', $token['data'])) { |
|
| 2638 | - /* Append the character to the current node. */ |
|
| 2639 | - $text = $this->dom->createTextNode($token['data']); |
|
| 2640 | - end($this->stack)->appendChild($text); |
|
| 2641 | - |
|
| 2642 | - /* A comment token */ |
|
| 2643 | - } elseif($token['type'] === HTML5::COMMENT) { |
|
| 2644 | - /* Append a Comment node to the current node with the data |
|
| 2636 | + if($token['type'] === HTML5::CHARACTR && |
|
| 2637 | + preg_match('/^[\t\n\x0b\x0c ]+$/', $token['data'])) { |
|
| 2638 | + /* Append the character to the current node. */ |
|
| 2639 | + $text = $this->dom->createTextNode($token['data']); |
|
| 2640 | + end($this->stack)->appendChild($text); |
|
| 2641 | + |
|
| 2642 | + /* A comment token */ |
|
| 2643 | + } elseif($token['type'] === HTML5::COMMENT) { |
|
| 2644 | + /* Append a Comment node to the current node with the data |
|
| 2645 | 2645 | attribute set to the data given in the comment token. */ |
| 2646 | - $comment = $this->dom->createComment($token['data']); |
|
| 2647 | - end($this->stack)->appendChild($comment); |
|
| 2646 | + $comment = $this->dom->createComment($token['data']); |
|
| 2647 | + end($this->stack)->appendChild($comment); |
|
| 2648 | 2648 | |
| 2649 | - /* A start tag whose tag name is "caption" */ |
|
| 2650 | - } elseif($token['type'] === HTML5::STARTTAG && |
|
| 2651 | - $token['name'] === 'caption') { |
|
| 2652 | - /* Clear the stack back to a table context. */ |
|
| 2653 | - $this->clearStackToTableContext($clear); |
|
| 2649 | + /* A start tag whose tag name is "caption" */ |
|
| 2650 | + } elseif($token['type'] === HTML5::STARTTAG && |
|
| 2651 | + $token['name'] === 'caption') { |
|
| 2652 | + /* Clear the stack back to a table context. */ |
|
| 2653 | + $this->clearStackToTableContext($clear); |
|
| 2654 | 2654 | |
| 2655 | - /* Insert a marker at the end of the list of active |
|
| 2655 | + /* Insert a marker at the end of the list of active |
|
| 2656 | 2656 | formatting elements. */ |
| 2657 | - $this->a_formatting[] = self::MARKER; |
|
| 2657 | + $this->a_formatting[] = self::MARKER; |
|
| 2658 | 2658 | |
| 2659 | - /* Insert an HTML element for the token, then switch the |
|
| 2659 | + /* Insert an HTML element for the token, then switch the |
|
| 2660 | 2660 | insertion mode to "in caption". */ |
| 2661 | - $this->insertElement($token); |
|
| 2662 | - $this->mode = self::IN_CAPTION; |
|
| 2661 | + $this->insertElement($token); |
|
| 2662 | + $this->mode = self::IN_CAPTION; |
|
| 2663 | 2663 | |
| 2664 | - /* A start tag whose tag name is "colgroup" */ |
|
| 2665 | - } elseif($token['type'] === HTML5::STARTTAG && |
|
| 2666 | - $token['name'] === 'colgroup') { |
|
| 2667 | - /* Clear the stack back to a table context. */ |
|
| 2668 | - $this->clearStackToTableContext($clear); |
|
| 2664 | + /* A start tag whose tag name is "colgroup" */ |
|
| 2665 | + } elseif($token['type'] === HTML5::STARTTAG && |
|
| 2666 | + $token['name'] === 'colgroup') { |
|
| 2667 | + /* Clear the stack back to a table context. */ |
|
| 2668 | + $this->clearStackToTableContext($clear); |
|
| 2669 | 2669 | |
| 2670 | - /* Insert an HTML element for the token, then switch the |
|
| 2670 | + /* Insert an HTML element for the token, then switch the |
|
| 2671 | 2671 | insertion mode to "in column group". */ |
| 2672 | - $this->insertElement($token); |
|
| 2673 | - $this->mode = self::IN_CGROUP; |
|
| 2674 | - |
|
| 2675 | - /* A start tag whose tag name is "col" */ |
|
| 2676 | - } elseif($token['type'] === HTML5::STARTTAG && |
|
| 2677 | - $token['name'] === 'col') { |
|
| 2678 | - $this->inTable(array( |
|
| 2679 | - 'name' => 'colgroup', |
|
| 2680 | - 'type' => HTML5::STARTTAG, |
|
| 2681 | - 'attr' => array() |
|
| 2682 | - )); |
|
| 2683 | - |
|
| 2684 | - $this->inColumnGroup($token); |
|
| 2685 | - |
|
| 2686 | - /* A start tag whose tag name is one of: "tbody", "tfoot", "thead" */ |
|
| 2687 | - } elseif($token['type'] === HTML5::STARTTAG && in_array($token['name'], |
|
| 2688 | - array('tbody', 'tfoot', 'thead'))) { |
|
| 2689 | - /* Clear the stack back to a table context. */ |
|
| 2690 | - $this->clearStackToTableContext($clear); |
|
| 2691 | - |
|
| 2692 | - /* Insert an HTML element for the token, then switch the insertion |
|
| 2672 | + $this->insertElement($token); |
|
| 2673 | + $this->mode = self::IN_CGROUP; |
|
| 2674 | + |
|
| 2675 | + /* A start tag whose tag name is "col" */ |
|
| 2676 | + } elseif($token['type'] === HTML5::STARTTAG && |
|
| 2677 | + $token['name'] === 'col') { |
|
| 2678 | + $this->inTable(array( |
|
| 2679 | + 'name' => 'colgroup', |
|
| 2680 | + 'type' => HTML5::STARTTAG, |
|
| 2681 | + 'attr' => array() |
|
| 2682 | + )); |
|
| 2683 | + |
|
| 2684 | + $this->inColumnGroup($token); |
|
| 2685 | + |
|
| 2686 | + /* A start tag whose tag name is one of: "tbody", "tfoot", "thead" */ |
|
| 2687 | + } elseif($token['type'] === HTML5::STARTTAG && in_array($token['name'], |
|
| 2688 | + array('tbody', 'tfoot', 'thead'))) { |
|
| 2689 | + /* Clear the stack back to a table context. */ |
|
| 2690 | + $this->clearStackToTableContext($clear); |
|
| 2691 | + |
|
| 2692 | + /* Insert an HTML element for the token, then switch the insertion |
|
| 2693 | 2693 | mode to "in table body". */ |
| 2694 | - $this->insertElement($token); |
|
| 2695 | - $this->mode = self::IN_TBODY; |
|
| 2694 | + $this->insertElement($token); |
|
| 2695 | + $this->mode = self::IN_TBODY; |
|
| 2696 | 2696 | |
| 2697 | - /* A start tag whose tag name is one of: "td", "th", "tr" */ |
|
| 2698 | - } elseif($token['type'] === HTML5::STARTTAG && |
|
| 2699 | - in_array($token['name'], array('td', 'th', 'tr'))) { |
|
| 2700 | - /* Act as if a start tag token with the tag name "tbody" had been |
|
| 2697 | + /* A start tag whose tag name is one of: "td", "th", "tr" */ |
|
| 2698 | + } elseif($token['type'] === HTML5::STARTTAG && |
|
| 2699 | + in_array($token['name'], array('td', 'th', 'tr'))) { |
|
| 2700 | + /* Act as if a start tag token with the tag name "tbody" had been |
|
| 2701 | 2701 | seen, then reprocess the current token. */ |
| 2702 | - $this->inTable(array( |
|
| 2703 | - 'name' => 'tbody', |
|
| 2704 | - 'type' => HTML5::STARTTAG, |
|
| 2705 | - 'attr' => array() |
|
| 2706 | - )); |
|
| 2707 | - |
|
| 2708 | - return $this->inTableBody($token); |
|
| 2709 | - |
|
| 2710 | - /* A start tag whose tag name is "table" */ |
|
| 2711 | - } elseif($token['type'] === HTML5::STARTTAG && |
|
| 2712 | - $token['name'] === 'table') { |
|
| 2713 | - /* Parse error. Act as if an end tag token with the tag name "table" |
|
| 2702 | + $this->inTable(array( |
|
| 2703 | + 'name' => 'tbody', |
|
| 2704 | + 'type' => HTML5::STARTTAG, |
|
| 2705 | + 'attr' => array() |
|
| 2706 | + )); |
|
| 2707 | + |
|
| 2708 | + return $this->inTableBody($token); |
|
| 2709 | + |
|
| 2710 | + /* A start tag whose tag name is "table" */ |
|
| 2711 | + } elseif($token['type'] === HTML5::STARTTAG && |
|
| 2712 | + $token['name'] === 'table') { |
|
| 2713 | + /* Parse error. Act as if an end tag token with the tag name "table" |
|
| 2714 | 2714 | had been seen, then, if that token wasn't ignored, reprocess the |
| 2715 | 2715 | current token. */ |
| 2716 | - $this->inTable(array( |
|
| 2717 | - 'name' => 'table', |
|
| 2718 | - 'type' => HTML5::ENDTAG |
|
| 2719 | - )); |
|
| 2716 | + $this->inTable(array( |
|
| 2717 | + 'name' => 'table', |
|
| 2718 | + 'type' => HTML5::ENDTAG |
|
| 2719 | + )); |
|
| 2720 | 2720 | |
| 2721 | - return $this->mainPhase($token); |
|
| 2721 | + return $this->mainPhase($token); |
|
| 2722 | 2722 | |
| 2723 | - /* An end tag whose tag name is "table" */ |
|
| 2724 | - } elseif($token['type'] === HTML5::ENDTAG && |
|
| 2725 | - $token['name'] === 'table') { |
|
| 2726 | - /* If the stack of open elements does not have an element in table |
|
| 2723 | + /* An end tag whose tag name is "table" */ |
|
| 2724 | + } elseif($token['type'] === HTML5::ENDTAG && |
|
| 2725 | + $token['name'] === 'table') { |
|
| 2726 | + /* If the stack of open elements does not have an element in table |
|
| 2727 | 2727 | scope with the same tag name as the token, this is a parse error. |
| 2728 | 2728 | Ignore the token. (innerHTML case) */ |
| 2729 | - if(!$this->elementInScope($token['name'], true)) { |
|
| 2730 | - return false; |
|
| 2729 | + if(!$this->elementInScope($token['name'], true)) { |
|
| 2730 | + return false; |
|
| 2731 | 2731 | |
| 2732 | - /* Otherwise: */ |
|
| 2733 | - } else { |
|
| 2734 | - /* Generate implied end tags. */ |
|
| 2735 | - $this->generateImpliedEndTags(); |
|
| 2732 | + /* Otherwise: */ |
|
| 2733 | + } else { |
|
| 2734 | + /* Generate implied end tags. */ |
|
| 2735 | + $this->generateImpliedEndTags(); |
|
| 2736 | 2736 | |
| 2737 | - /* Now, if the current node is not a table element, then this |
|
| 2737 | + /* Now, if the current node is not a table element, then this |
|
| 2738 | 2738 | is a parse error. */ |
| 2739 | - // w/e |
|
| 2739 | + // w/e |
|
| 2740 | 2740 | |
| 2741 | - /* Pop elements from this stack until a table element has been |
|
| 2741 | + /* Pop elements from this stack until a table element has been |
|
| 2742 | 2742 | popped from the stack. */ |
| 2743 | - while(true) { |
|
| 2744 | - $current = end($this->stack)->nodeName; |
|
| 2745 | - array_pop($this->stack); |
|
| 2743 | + while(true) { |
|
| 2744 | + $current = end($this->stack)->nodeName; |
|
| 2745 | + array_pop($this->stack); |
|
| 2746 | 2746 | |
| 2747 | - if($current === 'table') { |
|
| 2748 | - break; |
|
| 2749 | - } |
|
| 2750 | - } |
|
| 2747 | + if($current === 'table') { |
|
| 2748 | + break; |
|
| 2749 | + } |
|
| 2750 | + } |
|
| 2751 | 2751 | |
| 2752 | - /* Reset the insertion mode appropriately. */ |
|
| 2753 | - $this->resetInsertionMode(); |
|
| 2754 | - } |
|
| 2752 | + /* Reset the insertion mode appropriately. */ |
|
| 2753 | + $this->resetInsertionMode(); |
|
| 2754 | + } |
|
| 2755 | 2755 | |
| 2756 | - /* An end tag whose tag name is one of: "body", "caption", "col", |
|
| 2756 | + /* An end tag whose tag name is one of: "body", "caption", "col", |
|
| 2757 | 2757 | "colgroup", "html", "tbody", "td", "tfoot", "th", "thead", "tr" */ |
| 2758 | - } elseif($token['type'] === HTML5::ENDTAG && in_array($token['name'], |
|
| 2759 | - array('body', 'caption', 'col', 'colgroup', 'html', 'tbody', 'td', |
|
| 2760 | - 'tfoot', 'th', 'thead', 'tr'))) { |
|
| 2761 | - // Parse error. Ignore the token. |
|
| 2762 | - |
|
| 2763 | - /* Anything else */ |
|
| 2764 | - } else { |
|
| 2765 | - /* Parse error. Process the token as if the insertion mode was "in |
|
| 2758 | + } elseif($token['type'] === HTML5::ENDTAG && in_array($token['name'], |
|
| 2759 | + array('body', 'caption', 'col', 'colgroup', 'html', 'tbody', 'td', |
|
| 2760 | + 'tfoot', 'th', 'thead', 'tr'))) { |
|
| 2761 | + // Parse error. Ignore the token. |
|
| 2762 | + |
|
| 2763 | + /* Anything else */ |
|
| 2764 | + } else { |
|
| 2765 | + /* Parse error. Process the token as if the insertion mode was "in |
|
| 2766 | 2766 | body", with the following exception: */ |
| 2767 | 2767 | |
| 2768 | - /* If the current node is a table, tbody, tfoot, thead, or tr |
|
| 2768 | + /* If the current node is a table, tbody, tfoot, thead, or tr |
|
| 2769 | 2769 | element, then, whenever a node would be inserted into the current |
| 2770 | 2770 | node, it must instead be inserted into the foster parent element. */ |
| 2771 | - if(in_array(end($this->stack)->nodeName, |
|
| 2772 | - array('table', 'tbody', 'tfoot', 'thead', 'tr'))) { |
|
| 2773 | - /* The foster parent element is the parent element of the last |
|
| 2771 | + if(in_array(end($this->stack)->nodeName, |
|
| 2772 | + array('table', 'tbody', 'tfoot', 'thead', 'tr'))) { |
|
| 2773 | + /* The foster parent element is the parent element of the last |
|
| 2774 | 2774 | table element in the stack of open elements, if there is a |
| 2775 | 2775 | table element and it has such a parent element. If there is no |
| 2776 | 2776 | table element in the stack of open elements (innerHTML case), |
@@ -2781,1124 +2781,1124 @@ discard block |
||
| 2781 | 2781 | its parent node is not an element, then the foster parent |
| 2782 | 2782 | element is the element before the last table element in the |
| 2783 | 2783 | stack of open elements. */ |
| 2784 | - for($n = count($this->stack) - 1; $n >= 0; $n--) { |
|
| 2785 | - if($this->stack[$n]->nodeName === 'table') { |
|
| 2786 | - $table = $this->stack[$n]; |
|
| 2787 | - break; |
|
| 2788 | - } |
|
| 2789 | - } |
|
| 2790 | - |
|
| 2791 | - if(isset($table) && $table->parentNode !== null) { |
|
| 2792 | - $this->foster_parent = $table->parentNode; |
|
| 2793 | - |
|
| 2794 | - } elseif(!isset($table)) { |
|
| 2795 | - $this->foster_parent = $this->stack[0]; |
|
| 2796 | - |
|
| 2797 | - } elseif(isset($table) && ($table->parentNode === null || |
|
| 2798 | - $table->parentNode->nodeType !== XML_ELEMENT_NODE)) { |
|
| 2799 | - $this->foster_parent = $this->stack[$n - 1]; |
|
| 2800 | - } |
|
| 2801 | - } |
|
| 2802 | - |
|
| 2803 | - $this->inBody($token); |
|
| 2804 | - } |
|
| 2805 | - } |
|
| 2806 | - |
|
| 2807 | - private function inCaption($token) { |
|
| 2808 | - /* An end tag whose tag name is "caption" */ |
|
| 2809 | - if($token['type'] === HTML5::ENDTAG && $token['name'] === 'caption') { |
|
| 2810 | - /* If the stack of open elements does not have an element in table |
|
| 2784 | + for($n = count($this->stack) - 1; $n >= 0; $n--) { |
|
| 2785 | + if($this->stack[$n]->nodeName === 'table') { |
|
| 2786 | + $table = $this->stack[$n]; |
|
| 2787 | + break; |
|
| 2788 | + } |
|
| 2789 | + } |
|
| 2790 | + |
|
| 2791 | + if(isset($table) && $table->parentNode !== null) { |
|
| 2792 | + $this->foster_parent = $table->parentNode; |
|
| 2793 | + |
|
| 2794 | + } elseif(!isset($table)) { |
|
| 2795 | + $this->foster_parent = $this->stack[0]; |
|
| 2796 | + |
|
| 2797 | + } elseif(isset($table) && ($table->parentNode === null || |
|
| 2798 | + $table->parentNode->nodeType !== XML_ELEMENT_NODE)) { |
|
| 2799 | + $this->foster_parent = $this->stack[$n - 1]; |
|
| 2800 | + } |
|
| 2801 | + } |
|
| 2802 | + |
|
| 2803 | + $this->inBody($token); |
|
| 2804 | + } |
|
| 2805 | + } |
|
| 2806 | + |
|
| 2807 | + private function inCaption($token) { |
|
| 2808 | + /* An end tag whose tag name is "caption" */ |
|
| 2809 | + if($token['type'] === HTML5::ENDTAG && $token['name'] === 'caption') { |
|
| 2810 | + /* If the stack of open elements does not have an element in table |
|
| 2811 | 2811 | scope with the same tag name as the token, this is a parse error. |
| 2812 | 2812 | Ignore the token. (innerHTML case) */ |
| 2813 | - if(!$this->elementInScope($token['name'], true)) { |
|
| 2814 | - // Ignore |
|
| 2813 | + if(!$this->elementInScope($token['name'], true)) { |
|
| 2814 | + // Ignore |
|
| 2815 | 2815 | |
| 2816 | - /* Otherwise: */ |
|
| 2817 | - } else { |
|
| 2818 | - /* Generate implied end tags. */ |
|
| 2819 | - $this->generateImpliedEndTags(); |
|
| 2816 | + /* Otherwise: */ |
|
| 2817 | + } else { |
|
| 2818 | + /* Generate implied end tags. */ |
|
| 2819 | + $this->generateImpliedEndTags(); |
|
| 2820 | 2820 | |
| 2821 | - /* Now, if the current node is not a caption element, then this |
|
| 2821 | + /* Now, if the current node is not a caption element, then this |
|
| 2822 | 2822 | is a parse error. */ |
| 2823 | - // w/e |
|
| 2823 | + // w/e |
|
| 2824 | 2824 | |
| 2825 | - /* Pop elements from this stack until a caption element has |
|
| 2825 | + /* Pop elements from this stack until a caption element has |
|
| 2826 | 2826 | been popped from the stack. */ |
| 2827 | - while(true) { |
|
| 2828 | - $node = end($this->stack)->nodeName; |
|
| 2829 | - array_pop($this->stack); |
|
| 2827 | + while(true) { |
|
| 2828 | + $node = end($this->stack)->nodeName; |
|
| 2829 | + array_pop($this->stack); |
|
| 2830 | 2830 | |
| 2831 | - if($node === 'caption') { |
|
| 2832 | - break; |
|
| 2833 | - } |
|
| 2834 | - } |
|
| 2831 | + if($node === 'caption') { |
|
| 2832 | + break; |
|
| 2833 | + } |
|
| 2834 | + } |
|
| 2835 | 2835 | |
| 2836 | - /* Clear the list of active formatting elements up to the last |
|
| 2836 | + /* Clear the list of active formatting elements up to the last |
|
| 2837 | 2837 | marker. */ |
| 2838 | - $this->clearTheActiveFormattingElementsUpToTheLastMarker(); |
|
| 2838 | + $this->clearTheActiveFormattingElementsUpToTheLastMarker(); |
|
| 2839 | 2839 | |
| 2840 | - /* Switch the insertion mode to "in table". */ |
|
| 2841 | - $this->mode = self::IN_TABLE; |
|
| 2842 | - } |
|
| 2840 | + /* Switch the insertion mode to "in table". */ |
|
| 2841 | + $this->mode = self::IN_TABLE; |
|
| 2842 | + } |
|
| 2843 | 2843 | |
| 2844 | - /* A start tag whose tag name is one of: "caption", "col", "colgroup", |
|
| 2844 | + /* A start tag whose tag name is one of: "caption", "col", "colgroup", |
|
| 2845 | 2845 | "tbody", "td", "tfoot", "th", "thead", "tr", or an end tag whose tag |
| 2846 | 2846 | name is "table" */ |
| 2847 | - } elseif(($token['type'] === HTML5::STARTTAG && in_array($token['name'], |
|
| 2848 | - array('caption', 'col', 'colgroup', 'tbody', 'td', 'tfoot', 'th', |
|
| 2849 | - 'thead', 'tr'))) || ($token['type'] === HTML5::ENDTAG && |
|
| 2850 | - $token['name'] === 'table')) { |
|
| 2851 | - /* Parse error. Act as if an end tag with the tag name "caption" |
|
| 2847 | + } elseif(($token['type'] === HTML5::STARTTAG && in_array($token['name'], |
|
| 2848 | + array('caption', 'col', 'colgroup', 'tbody', 'td', 'tfoot', 'th', |
|
| 2849 | + 'thead', 'tr'))) || ($token['type'] === HTML5::ENDTAG && |
|
| 2850 | + $token['name'] === 'table')) { |
|
| 2851 | + /* Parse error. Act as if an end tag with the tag name "caption" |
|
| 2852 | 2852 | had been seen, then, if that token wasn't ignored, reprocess the |
| 2853 | 2853 | current token. */ |
| 2854 | - $this->inCaption(array( |
|
| 2855 | - 'name' => 'caption', |
|
| 2856 | - 'type' => HTML5::ENDTAG |
|
| 2857 | - )); |
|
| 2854 | + $this->inCaption(array( |
|
| 2855 | + 'name' => 'caption', |
|
| 2856 | + 'type' => HTML5::ENDTAG |
|
| 2857 | + )); |
|
| 2858 | 2858 | |
| 2859 | - return $this->inTable($token); |
|
| 2859 | + return $this->inTable($token); |
|
| 2860 | 2860 | |
| 2861 | - /* An end tag whose tag name is one of: "body", "col", "colgroup", |
|
| 2861 | + /* An end tag whose tag name is one of: "body", "col", "colgroup", |
|
| 2862 | 2862 | "html", "tbody", "td", "tfoot", "th", "thead", "tr" */ |
| 2863 | - } elseif($token['type'] === HTML5::ENDTAG && in_array($token['name'], |
|
| 2864 | - array('body', 'col', 'colgroup', 'html', 'tbody', 'tfoot', 'th', |
|
| 2865 | - 'thead', 'tr'))) { |
|
| 2866 | - // Parse error. Ignore the token. |
|
| 2867 | - |
|
| 2868 | - /* Anything else */ |
|
| 2869 | - } else { |
|
| 2870 | - /* Process the token as if the insertion mode was "in body". */ |
|
| 2871 | - $this->inBody($token); |
|
| 2872 | - } |
|
| 2873 | - } |
|
| 2874 | - |
|
| 2875 | - private function inColumnGroup($token) { |
|
| 2876 | - /* A character token that is one of one of U+0009 CHARACTER TABULATION, |
|
| 2863 | + } elseif($token['type'] === HTML5::ENDTAG && in_array($token['name'], |
|
| 2864 | + array('body', 'col', 'colgroup', 'html', 'tbody', 'tfoot', 'th', |
|
| 2865 | + 'thead', 'tr'))) { |
|
| 2866 | + // Parse error. Ignore the token. |
|
| 2867 | + |
|
| 2868 | + /* Anything else */ |
|
| 2869 | + } else { |
|
| 2870 | + /* Process the token as if the insertion mode was "in body". */ |
|
| 2871 | + $this->inBody($token); |
|
| 2872 | + } |
|
| 2873 | + } |
|
| 2874 | + |
|
| 2875 | + private function inColumnGroup($token) { |
|
| 2876 | + /* A character token that is one of one of U+0009 CHARACTER TABULATION, |
|
| 2877 | 2877 | U+000A LINE FEED (LF), U+000B LINE TABULATION, U+000C FORM FEED (FF), |
| 2878 | 2878 | or U+0020 SPACE */ |
| 2879 | - if($token['type'] === HTML5::CHARACTR && |
|
| 2880 | - preg_match('/^[\t\n\x0b\x0c ]+$/', $token['data'])) { |
|
| 2881 | - /* Append the character to the current node. */ |
|
| 2882 | - $text = $this->dom->createTextNode($token['data']); |
|
| 2883 | - end($this->stack)->appendChild($text); |
|
| 2884 | - |
|
| 2885 | - /* A comment token */ |
|
| 2886 | - } elseif($token['type'] === HTML5::COMMENT) { |
|
| 2887 | - /* Append a Comment node to the current node with the data |
|
| 2879 | + if($token['type'] === HTML5::CHARACTR && |
|
| 2880 | + preg_match('/^[\t\n\x0b\x0c ]+$/', $token['data'])) { |
|
| 2881 | + /* Append the character to the current node. */ |
|
| 2882 | + $text = $this->dom->createTextNode($token['data']); |
|
| 2883 | + end($this->stack)->appendChild($text); |
|
| 2884 | + |
|
| 2885 | + /* A comment token */ |
|
| 2886 | + } elseif($token['type'] === HTML5::COMMENT) { |
|
| 2887 | + /* Append a Comment node to the current node with the data |
|
| 2888 | 2888 | attribute set to the data given in the comment token. */ |
| 2889 | - $comment = $this->dom->createComment($token['data']); |
|
| 2890 | - end($this->stack)->appendChild($comment); |
|
| 2889 | + $comment = $this->dom->createComment($token['data']); |
|
| 2890 | + end($this->stack)->appendChild($comment); |
|
| 2891 | 2891 | |
| 2892 | - /* A start tag whose tag name is "col" */ |
|
| 2893 | - } elseif($token['type'] === HTML5::STARTTAG && $token['name'] === 'col') { |
|
| 2894 | - /* Insert a col element for the token. Immediately pop the current |
|
| 2892 | + /* A start tag whose tag name is "col" */ |
|
| 2893 | + } elseif($token['type'] === HTML5::STARTTAG && $token['name'] === 'col') { |
|
| 2894 | + /* Insert a col element for the token. Immediately pop the current |
|
| 2895 | 2895 | node off the stack of open elements. */ |
| 2896 | - $this->insertElement($token); |
|
| 2897 | - array_pop($this->stack); |
|
| 2896 | + $this->insertElement($token); |
|
| 2897 | + array_pop($this->stack); |
|
| 2898 | 2898 | |
| 2899 | - /* An end tag whose tag name is "colgroup" */ |
|
| 2900 | - } elseif($token['type'] === HTML5::ENDTAG && |
|
| 2901 | - $token['name'] === 'colgroup') { |
|
| 2902 | - /* If the current node is the root html element, then this is a |
|
| 2899 | + /* An end tag whose tag name is "colgroup" */ |
|
| 2900 | + } elseif($token['type'] === HTML5::ENDTAG && |
|
| 2901 | + $token['name'] === 'colgroup') { |
|
| 2902 | + /* If the current node is the root html element, then this is a |
|
| 2903 | 2903 | parse error, ignore the token. (innerHTML case) */ |
| 2904 | - if(end($this->stack)->nodeName === 'html') { |
|
| 2905 | - // Ignore |
|
| 2904 | + if(end($this->stack)->nodeName === 'html') { |
|
| 2905 | + // Ignore |
|
| 2906 | 2906 | |
| 2907 | - /* Otherwise, pop the current node (which will be a colgroup |
|
| 2907 | + /* Otherwise, pop the current node (which will be a colgroup |
|
| 2908 | 2908 | element) from the stack of open elements. Switch the insertion |
| 2909 | 2909 | mode to "in table". */ |
| 2910 | - } else { |
|
| 2911 | - array_pop($this->stack); |
|
| 2912 | - $this->mode = self::IN_TABLE; |
|
| 2913 | - } |
|
| 2914 | - |
|
| 2915 | - /* An end tag whose tag name is "col" */ |
|
| 2916 | - } elseif($token['type'] === HTML5::ENDTAG && $token['name'] === 'col') { |
|
| 2917 | - /* Parse error. Ignore the token. */ |
|
| 2918 | - |
|
| 2919 | - /* Anything else */ |
|
| 2920 | - } else { |
|
| 2921 | - /* Act as if an end tag with the tag name "colgroup" had been seen, |
|
| 2910 | + } else { |
|
| 2911 | + array_pop($this->stack); |
|
| 2912 | + $this->mode = self::IN_TABLE; |
|
| 2913 | + } |
|
| 2914 | + |
|
| 2915 | + /* An end tag whose tag name is "col" */ |
|
| 2916 | + } elseif($token['type'] === HTML5::ENDTAG && $token['name'] === 'col') { |
|
| 2917 | + /* Parse error. Ignore the token. */ |
|
| 2918 | + |
|
| 2919 | + /* Anything else */ |
|
| 2920 | + } else { |
|
| 2921 | + /* Act as if an end tag with the tag name "colgroup" had been seen, |
|
| 2922 | 2922 | and then, if that token wasn't ignored, reprocess the current token. */ |
| 2923 | - $this->inColumnGroup(array( |
|
| 2924 | - 'name' => 'colgroup', |
|
| 2925 | - 'type' => HTML5::ENDTAG |
|
| 2926 | - )); |
|
| 2923 | + $this->inColumnGroup(array( |
|
| 2924 | + 'name' => 'colgroup', |
|
| 2925 | + 'type' => HTML5::ENDTAG |
|
| 2926 | + )); |
|
| 2927 | 2927 | |
| 2928 | - return $this->inTable($token); |
|
| 2929 | - } |
|
| 2930 | - } |
|
| 2928 | + return $this->inTable($token); |
|
| 2929 | + } |
|
| 2930 | + } |
|
| 2931 | 2931 | |
| 2932 | - private function inTableBody($token) { |
|
| 2933 | - $clear = array('tbody', 'tfoot', 'thead', 'html'); |
|
| 2932 | + private function inTableBody($token) { |
|
| 2933 | + $clear = array('tbody', 'tfoot', 'thead', 'html'); |
|
| 2934 | 2934 | |
| 2935 | - /* A start tag whose tag name is "tr" */ |
|
| 2936 | - if($token['type'] === HTML5::STARTTAG && $token['name'] === 'tr') { |
|
| 2937 | - /* Clear the stack back to a table body context. */ |
|
| 2938 | - $this->clearStackToTableContext($clear); |
|
| 2935 | + /* A start tag whose tag name is "tr" */ |
|
| 2936 | + if($token['type'] === HTML5::STARTTAG && $token['name'] === 'tr') { |
|
| 2937 | + /* Clear the stack back to a table body context. */ |
|
| 2938 | + $this->clearStackToTableContext($clear); |
|
| 2939 | 2939 | |
| 2940 | - /* Insert a tr element for the token, then switch the insertion |
|
| 2940 | + /* Insert a tr element for the token, then switch the insertion |
|
| 2941 | 2941 | mode to "in row". */ |
| 2942 | - $this->insertElement($token); |
|
| 2943 | - $this->mode = self::IN_ROW; |
|
| 2942 | + $this->insertElement($token); |
|
| 2943 | + $this->mode = self::IN_ROW; |
|
| 2944 | 2944 | |
| 2945 | - /* A start tag whose tag name is one of: "th", "td" */ |
|
| 2946 | - } elseif($token['type'] === HTML5::STARTTAG && |
|
| 2947 | - ($token['name'] === 'th' || $token['name'] === 'td')) { |
|
| 2948 | - /* Parse error. Act as if a start tag with the tag name "tr" had |
|
| 2945 | + /* A start tag whose tag name is one of: "th", "td" */ |
|
| 2946 | + } elseif($token['type'] === HTML5::STARTTAG && |
|
| 2947 | + ($token['name'] === 'th' || $token['name'] === 'td')) { |
|
| 2948 | + /* Parse error. Act as if a start tag with the tag name "tr" had |
|
| 2949 | 2949 | been seen, then reprocess the current token. */ |
| 2950 | - $this->inTableBody(array( |
|
| 2951 | - 'name' => 'tr', |
|
| 2952 | - 'type' => HTML5::STARTTAG, |
|
| 2953 | - 'attr' => array() |
|
| 2954 | - )); |
|
| 2955 | - |
|
| 2956 | - return $this->inRow($token); |
|
| 2957 | - |
|
| 2958 | - /* An end tag whose tag name is one of: "tbody", "tfoot", "thead" */ |
|
| 2959 | - } elseif($token['type'] === HTML5::ENDTAG && |
|
| 2960 | - in_array($token['name'], array('tbody', 'tfoot', 'thead'))) { |
|
| 2961 | - /* If the stack of open elements does not have an element in table |
|
| 2950 | + $this->inTableBody(array( |
|
| 2951 | + 'name' => 'tr', |
|
| 2952 | + 'type' => HTML5::STARTTAG, |
|
| 2953 | + 'attr' => array() |
|
| 2954 | + )); |
|
| 2955 | + |
|
| 2956 | + return $this->inRow($token); |
|
| 2957 | + |
|
| 2958 | + /* An end tag whose tag name is one of: "tbody", "tfoot", "thead" */ |
|
| 2959 | + } elseif($token['type'] === HTML5::ENDTAG && |
|
| 2960 | + in_array($token['name'], array('tbody', 'tfoot', 'thead'))) { |
|
| 2961 | + /* If the stack of open elements does not have an element in table |
|
| 2962 | 2962 | scope with the same tag name as the token, this is a parse error. |
| 2963 | 2963 | Ignore the token. */ |
| 2964 | - if(!$this->elementInScope($token['name'], true)) { |
|
| 2965 | - // Ignore |
|
| 2964 | + if(!$this->elementInScope($token['name'], true)) { |
|
| 2965 | + // Ignore |
|
| 2966 | 2966 | |
| 2967 | - /* Otherwise: */ |
|
| 2968 | - } else { |
|
| 2969 | - /* Clear the stack back to a table body context. */ |
|
| 2970 | - $this->clearStackToTableContext($clear); |
|
| 2967 | + /* Otherwise: */ |
|
| 2968 | + } else { |
|
| 2969 | + /* Clear the stack back to a table body context. */ |
|
| 2970 | + $this->clearStackToTableContext($clear); |
|
| 2971 | 2971 | |
| 2972 | - /* Pop the current node from the stack of open elements. Switch |
|
| 2972 | + /* Pop the current node from the stack of open elements. Switch |
|
| 2973 | 2973 | the insertion mode to "in table". */ |
| 2974 | - array_pop($this->stack); |
|
| 2975 | - $this->mode = self::IN_TABLE; |
|
| 2976 | - } |
|
| 2974 | + array_pop($this->stack); |
|
| 2975 | + $this->mode = self::IN_TABLE; |
|
| 2976 | + } |
|
| 2977 | 2977 | |
| 2978 | - /* A start tag whose tag name is one of: "caption", "col", "colgroup", |
|
| 2978 | + /* A start tag whose tag name is one of: "caption", "col", "colgroup", |
|
| 2979 | 2979 | "tbody", "tfoot", "thead", or an end tag whose tag name is "table" */ |
| 2980 | - } elseif(($token['type'] === HTML5::STARTTAG && in_array($token['name'], |
|
| 2981 | - array('caption', 'col', 'colgroup', 'tbody', 'tfoor', 'thead'))) || |
|
| 2982 | - ($token['type'] === HTML5::STARTTAG && $token['name'] === 'table')) { |
|
| 2983 | - /* If the stack of open elements does not have a tbody, thead, or |
|
| 2980 | + } elseif(($token['type'] === HTML5::STARTTAG && in_array($token['name'], |
|
| 2981 | + array('caption', 'col', 'colgroup', 'tbody', 'tfoor', 'thead'))) || |
|
| 2982 | + ($token['type'] === HTML5::STARTTAG && $token['name'] === 'table')) { |
|
| 2983 | + /* If the stack of open elements does not have a tbody, thead, or |
|
| 2984 | 2984 | tfoot element in table scope, this is a parse error. Ignore the |
| 2985 | 2985 | token. (innerHTML case) */ |
| 2986 | - if(!$this->elementInScope(array('tbody', 'thead', 'tfoot'), true)) { |
|
| 2987 | - // Ignore. |
|
| 2986 | + if(!$this->elementInScope(array('tbody', 'thead', 'tfoot'), true)) { |
|
| 2987 | + // Ignore. |
|
| 2988 | 2988 | |
| 2989 | - /* Otherwise: */ |
|
| 2990 | - } else { |
|
| 2991 | - /* Clear the stack back to a table body context. */ |
|
| 2992 | - $this->clearStackToTableContext($clear); |
|
| 2989 | + /* Otherwise: */ |
|
| 2990 | + } else { |
|
| 2991 | + /* Clear the stack back to a table body context. */ |
|
| 2992 | + $this->clearStackToTableContext($clear); |
|
| 2993 | 2993 | |
| 2994 | - /* Act as if an end tag with the same tag name as the current |
|
| 2994 | + /* Act as if an end tag with the same tag name as the current |
|
| 2995 | 2995 | node ("tbody", "tfoot", or "thead") had been seen, then |
| 2996 | 2996 | reprocess the current token. */ |
| 2997 | - $this->inTableBody(array( |
|
| 2998 | - 'name' => end($this->stack)->nodeName, |
|
| 2999 | - 'type' => HTML5::ENDTAG |
|
| 3000 | - )); |
|
| 2997 | + $this->inTableBody(array( |
|
| 2998 | + 'name' => end($this->stack)->nodeName, |
|
| 2999 | + 'type' => HTML5::ENDTAG |
|
| 3000 | + )); |
|
| 3001 | 3001 | |
| 3002 | - return $this->mainPhase($token); |
|
| 3003 | - } |
|
| 3002 | + return $this->mainPhase($token); |
|
| 3003 | + } |
|
| 3004 | 3004 | |
| 3005 | - /* An end tag whose tag name is one of: "body", "caption", "col", |
|
| 3005 | + /* An end tag whose tag name is one of: "body", "caption", "col", |
|
| 3006 | 3006 | "colgroup", "html", "td", "th", "tr" */ |
| 3007 | - } elseif($token['type'] === HTML5::ENDTAG && in_array($token['name'], |
|
| 3008 | - array('body', 'caption', 'col', 'colgroup', 'html', 'td', 'th', 'tr'))) { |
|
| 3009 | - /* Parse error. Ignore the token. */ |
|
| 3010 | - |
|
| 3011 | - /* Anything else */ |
|
| 3012 | - } else { |
|
| 3013 | - /* Process the token as if the insertion mode was "in table". */ |
|
| 3014 | - $this->inTable($token); |
|
| 3015 | - } |
|
| 3016 | - } |
|
| 3017 | - |
|
| 3018 | - private function inRow($token) { |
|
| 3019 | - $clear = array('tr', 'html'); |
|
| 3020 | - |
|
| 3021 | - /* A start tag whose tag name is one of: "th", "td" */ |
|
| 3022 | - if($token['type'] === HTML5::STARTTAG && |
|
| 3023 | - ($token['name'] === 'th' || $token['name'] === 'td')) { |
|
| 3024 | - /* Clear the stack back to a table row context. */ |
|
| 3025 | - $this->clearStackToTableContext($clear); |
|
| 3026 | - |
|
| 3027 | - /* Insert an HTML element for the token, then switch the insertion |
|
| 3007 | + } elseif($token['type'] === HTML5::ENDTAG && in_array($token['name'], |
|
| 3008 | + array('body', 'caption', 'col', 'colgroup', 'html', 'td', 'th', 'tr'))) { |
|
| 3009 | + /* Parse error. Ignore the token. */ |
|
| 3010 | + |
|
| 3011 | + /* Anything else */ |
|
| 3012 | + } else { |
|
| 3013 | + /* Process the token as if the insertion mode was "in table". */ |
|
| 3014 | + $this->inTable($token); |
|
| 3015 | + } |
|
| 3016 | + } |
|
| 3017 | + |
|
| 3018 | + private function inRow($token) { |
|
| 3019 | + $clear = array('tr', 'html'); |
|
| 3020 | + |
|
| 3021 | + /* A start tag whose tag name is one of: "th", "td" */ |
|
| 3022 | + if($token['type'] === HTML5::STARTTAG && |
|
| 3023 | + ($token['name'] === 'th' || $token['name'] === 'td')) { |
|
| 3024 | + /* Clear the stack back to a table row context. */ |
|
| 3025 | + $this->clearStackToTableContext($clear); |
|
| 3026 | + |
|
| 3027 | + /* Insert an HTML element for the token, then switch the insertion |
|
| 3028 | 3028 | mode to "in cell". */ |
| 3029 | - $this->insertElement($token); |
|
| 3030 | - $this->mode = self::IN_CELL; |
|
| 3029 | + $this->insertElement($token); |
|
| 3030 | + $this->mode = self::IN_CELL; |
|
| 3031 | 3031 | |
| 3032 | - /* Insert a marker at the end of the list of active formatting |
|
| 3032 | + /* Insert a marker at the end of the list of active formatting |
|
| 3033 | 3033 | elements. */ |
| 3034 | - $this->a_formatting[] = self::MARKER; |
|
| 3034 | + $this->a_formatting[] = self::MARKER; |
|
| 3035 | 3035 | |
| 3036 | - /* An end tag whose tag name is "tr" */ |
|
| 3037 | - } elseif($token['type'] === HTML5::ENDTAG && $token['name'] === 'tr') { |
|
| 3038 | - /* If the stack of open elements does not have an element in table |
|
| 3036 | + /* An end tag whose tag name is "tr" */ |
|
| 3037 | + } elseif($token['type'] === HTML5::ENDTAG && $token['name'] === 'tr') { |
|
| 3038 | + /* If the stack of open elements does not have an element in table |
|
| 3039 | 3039 | scope with the same tag name as the token, this is a parse error. |
| 3040 | 3040 | Ignore the token. (innerHTML case) */ |
| 3041 | - if(!$this->elementInScope($token['name'], true)) { |
|
| 3042 | - // Ignore. |
|
| 3041 | + if(!$this->elementInScope($token['name'], true)) { |
|
| 3042 | + // Ignore. |
|
| 3043 | 3043 | |
| 3044 | - /* Otherwise: */ |
|
| 3045 | - } else { |
|
| 3046 | - /* Clear the stack back to a table row context. */ |
|
| 3047 | - $this->clearStackToTableContext($clear); |
|
| 3044 | + /* Otherwise: */ |
|
| 3045 | + } else { |
|
| 3046 | + /* Clear the stack back to a table row context. */ |
|
| 3047 | + $this->clearStackToTableContext($clear); |
|
| 3048 | 3048 | |
| 3049 | - /* Pop the current node (which will be a tr element) from the |
|
| 3049 | + /* Pop the current node (which will be a tr element) from the |
|
| 3050 | 3050 | stack of open elements. Switch the insertion mode to "in table |
| 3051 | 3051 | body". */ |
| 3052 | - array_pop($this->stack); |
|
| 3053 | - $this->mode = self::IN_TBODY; |
|
| 3054 | - } |
|
| 3052 | + array_pop($this->stack); |
|
| 3053 | + $this->mode = self::IN_TBODY; |
|
| 3054 | + } |
|
| 3055 | 3055 | |
| 3056 | - /* A start tag whose tag name is one of: "caption", "col", "colgroup", |
|
| 3056 | + /* A start tag whose tag name is one of: "caption", "col", "colgroup", |
|
| 3057 | 3057 | "tbody", "tfoot", "thead", "tr" or an end tag whose tag name is "table" */ |
| 3058 | - } elseif($token['type'] === HTML5::STARTTAG && in_array($token['name'], |
|
| 3059 | - array('caption', 'col', 'colgroup', 'tbody', 'tfoot', 'thead', 'tr'))) { |
|
| 3060 | - /* Act as if an end tag with the tag name "tr" had been seen, then, |
|
| 3058 | + } elseif($token['type'] === HTML5::STARTTAG && in_array($token['name'], |
|
| 3059 | + array('caption', 'col', 'colgroup', 'tbody', 'tfoot', 'thead', 'tr'))) { |
|
| 3060 | + /* Act as if an end tag with the tag name "tr" had been seen, then, |
|
| 3061 | 3061 | if that token wasn't ignored, reprocess the current token. */ |
| 3062 | - $this->inRow(array( |
|
| 3063 | - 'name' => 'tr', |
|
| 3064 | - 'type' => HTML5::ENDTAG |
|
| 3065 | - )); |
|
| 3062 | + $this->inRow(array( |
|
| 3063 | + 'name' => 'tr', |
|
| 3064 | + 'type' => HTML5::ENDTAG |
|
| 3065 | + )); |
|
| 3066 | 3066 | |
| 3067 | - return $this->inCell($token); |
|
| 3067 | + return $this->inCell($token); |
|
| 3068 | 3068 | |
| 3069 | - /* An end tag whose tag name is one of: "tbody", "tfoot", "thead" */ |
|
| 3070 | - } elseif($token['type'] === HTML5::ENDTAG && |
|
| 3071 | - in_array($token['name'], array('tbody', 'tfoot', 'thead'))) { |
|
| 3072 | - /* If the stack of open elements does not have an element in table |
|
| 3069 | + /* An end tag whose tag name is one of: "tbody", "tfoot", "thead" */ |
|
| 3070 | + } elseif($token['type'] === HTML5::ENDTAG && |
|
| 3071 | + in_array($token['name'], array('tbody', 'tfoot', 'thead'))) { |
|
| 3072 | + /* If the stack of open elements does not have an element in table |
|
| 3073 | 3073 | scope with the same tag name as the token, this is a parse error. |
| 3074 | 3074 | Ignore the token. */ |
| 3075 | - if(!$this->elementInScope($token['name'], true)) { |
|
| 3076 | - // Ignore. |
|
| 3075 | + if(!$this->elementInScope($token['name'], true)) { |
|
| 3076 | + // Ignore. |
|
| 3077 | 3077 | |
| 3078 | - /* Otherwise: */ |
|
| 3079 | - } else { |
|
| 3080 | - /* Otherwise, act as if an end tag with the tag name "tr" had |
|
| 3078 | + /* Otherwise: */ |
|
| 3079 | + } else { |
|
| 3080 | + /* Otherwise, act as if an end tag with the tag name "tr" had |
|
| 3081 | 3081 | been seen, then reprocess the current token. */ |
| 3082 | - $this->inRow(array( |
|
| 3083 | - 'name' => 'tr', |
|
| 3084 | - 'type' => HTML5::ENDTAG |
|
| 3085 | - )); |
|
| 3082 | + $this->inRow(array( |
|
| 3083 | + 'name' => 'tr', |
|
| 3084 | + 'type' => HTML5::ENDTAG |
|
| 3085 | + )); |
|
| 3086 | 3086 | |
| 3087 | - return $this->inCell($token); |
|
| 3088 | - } |
|
| 3087 | + return $this->inCell($token); |
|
| 3088 | + } |
|
| 3089 | 3089 | |
| 3090 | - /* An end tag whose tag name is one of: "body", "caption", "col", |
|
| 3090 | + /* An end tag whose tag name is one of: "body", "caption", "col", |
|
| 3091 | 3091 | "colgroup", "html", "td", "th" */ |
| 3092 | - } elseif($token['type'] === HTML5::ENDTAG && in_array($token['name'], |
|
| 3093 | - array('body', 'caption', 'col', 'colgroup', 'html', 'td', 'th', 'tr'))) { |
|
| 3094 | - /* Parse error. Ignore the token. */ |
|
| 3095 | - |
|
| 3096 | - /* Anything else */ |
|
| 3097 | - } else { |
|
| 3098 | - /* Process the token as if the insertion mode was "in table". */ |
|
| 3099 | - $this->inTable($token); |
|
| 3100 | - } |
|
| 3101 | - } |
|
| 3102 | - |
|
| 3103 | - private function inCell($token) { |
|
| 3104 | - /* An end tag whose tag name is one of: "td", "th" */ |
|
| 3105 | - if($token['type'] === HTML5::ENDTAG && |
|
| 3106 | - ($token['name'] === 'td' || $token['name'] === 'th')) { |
|
| 3107 | - /* If the stack of open elements does not have an element in table |
|
| 3092 | + } elseif($token['type'] === HTML5::ENDTAG && in_array($token['name'], |
|
| 3093 | + array('body', 'caption', 'col', 'colgroup', 'html', 'td', 'th', 'tr'))) { |
|
| 3094 | + /* Parse error. Ignore the token. */ |
|
| 3095 | + |
|
| 3096 | + /* Anything else */ |
|
| 3097 | + } else { |
|
| 3098 | + /* Process the token as if the insertion mode was "in table". */ |
|
| 3099 | + $this->inTable($token); |
|
| 3100 | + } |
|
| 3101 | + } |
|
| 3102 | + |
|
| 3103 | + private function inCell($token) { |
|
| 3104 | + /* An end tag whose tag name is one of: "td", "th" */ |
|
| 3105 | + if($token['type'] === HTML5::ENDTAG && |
|
| 3106 | + ($token['name'] === 'td' || $token['name'] === 'th')) { |
|
| 3107 | + /* If the stack of open elements does not have an element in table |
|
| 3108 | 3108 | scope with the same tag name as that of the token, then this is a |
| 3109 | 3109 | parse error and the token must be ignored. */ |
| 3110 | - if(!$this->elementInScope($token['name'], true)) { |
|
| 3111 | - // Ignore. |
|
| 3110 | + if(!$this->elementInScope($token['name'], true)) { |
|
| 3111 | + // Ignore. |
|
| 3112 | 3112 | |
| 3113 | - /* Otherwise: */ |
|
| 3114 | - } else { |
|
| 3115 | - /* Generate implied end tags, except for elements with the same |
|
| 3113 | + /* Otherwise: */ |
|
| 3114 | + } else { |
|
| 3115 | + /* Generate implied end tags, except for elements with the same |
|
| 3116 | 3116 | tag name as the token. */ |
| 3117 | - $this->generateImpliedEndTags(array($token['name'])); |
|
| 3117 | + $this->generateImpliedEndTags(array($token['name'])); |
|
| 3118 | 3118 | |
| 3119 | - /* Now, if the current node is not an element with the same tag |
|
| 3119 | + /* Now, if the current node is not an element with the same tag |
|
| 3120 | 3120 | name as the token, then this is a parse error. */ |
| 3121 | - // k |
|
| 3121 | + // k |
|
| 3122 | 3122 | |
| 3123 | - /* Pop elements from this stack until an element with the same |
|
| 3123 | + /* Pop elements from this stack until an element with the same |
|
| 3124 | 3124 | tag name as the token has been popped from the stack. */ |
| 3125 | - while(true) { |
|
| 3126 | - $node = end($this->stack)->nodeName; |
|
| 3127 | - array_pop($this->stack); |
|
| 3125 | + while(true) { |
|
| 3126 | + $node = end($this->stack)->nodeName; |
|
| 3127 | + array_pop($this->stack); |
|
| 3128 | 3128 | |
| 3129 | - if($node === $token['name']) { |
|
| 3130 | - break; |
|
| 3131 | - } |
|
| 3132 | - } |
|
| 3129 | + if($node === $token['name']) { |
|
| 3130 | + break; |
|
| 3131 | + } |
|
| 3132 | + } |
|
| 3133 | 3133 | |
| 3134 | - /* Clear the list of active formatting elements up to the last |
|
| 3134 | + /* Clear the list of active formatting elements up to the last |
|
| 3135 | 3135 | marker. */ |
| 3136 | - $this->clearTheActiveFormattingElementsUpToTheLastMarker(); |
|
| 3136 | + $this->clearTheActiveFormattingElementsUpToTheLastMarker(); |
|
| 3137 | 3137 | |
| 3138 | - /* Switch the insertion mode to "in row". (The current node |
|
| 3138 | + /* Switch the insertion mode to "in row". (The current node |
|
| 3139 | 3139 | will be a tr element at this point.) */ |
| 3140 | - $this->mode = self::IN_ROW; |
|
| 3141 | - } |
|
| 3140 | + $this->mode = self::IN_ROW; |
|
| 3141 | + } |
|
| 3142 | 3142 | |
| 3143 | - /* A start tag whose tag name is one of: "caption", "col", "colgroup", |
|
| 3143 | + /* A start tag whose tag name is one of: "caption", "col", "colgroup", |
|
| 3144 | 3144 | "tbody", "td", "tfoot", "th", "thead", "tr" */ |
| 3145 | - } elseif($token['type'] === HTML5::STARTTAG && in_array($token['name'], |
|
| 3146 | - array('caption', 'col', 'colgroup', 'tbody', 'td', 'tfoot', 'th', |
|
| 3147 | - 'thead', 'tr'))) { |
|
| 3148 | - /* If the stack of open elements does not have a td or th element |
|
| 3145 | + } elseif($token['type'] === HTML5::STARTTAG && in_array($token['name'], |
|
| 3146 | + array('caption', 'col', 'colgroup', 'tbody', 'td', 'tfoot', 'th', |
|
| 3147 | + 'thead', 'tr'))) { |
|
| 3148 | + /* If the stack of open elements does not have a td or th element |
|
| 3149 | 3149 | in table scope, then this is a parse error; ignore the token. |
| 3150 | 3150 | (innerHTML case) */ |
| 3151 | - if(!$this->elementInScope(array('td', 'th'), true)) { |
|
| 3152 | - // Ignore. |
|
| 3151 | + if(!$this->elementInScope(array('td', 'th'), true)) { |
|
| 3152 | + // Ignore. |
|
| 3153 | 3153 | |
| 3154 | - /* Otherwise, close the cell (see below) and reprocess the current |
|
| 3154 | + /* Otherwise, close the cell (see below) and reprocess the current |
|
| 3155 | 3155 | token. */ |
| 3156 | - } else { |
|
| 3157 | - $this->closeCell(); |
|
| 3158 | - return $this->inRow($token); |
|
| 3159 | - } |
|
| 3156 | + } else { |
|
| 3157 | + $this->closeCell(); |
|
| 3158 | + return $this->inRow($token); |
|
| 3159 | + } |
|
| 3160 | 3160 | |
| 3161 | - /* A start tag whose tag name is one of: "caption", "col", "colgroup", |
|
| 3161 | + /* A start tag whose tag name is one of: "caption", "col", "colgroup", |
|
| 3162 | 3162 | "tbody", "td", "tfoot", "th", "thead", "tr" */ |
| 3163 | - } elseif($token['type'] === HTML5::STARTTAG && in_array($token['name'], |
|
| 3164 | - array('caption', 'col', 'colgroup', 'tbody', 'td', 'tfoot', 'th', |
|
| 3165 | - 'thead', 'tr'))) { |
|
| 3166 | - /* If the stack of open elements does not have a td or th element |
|
| 3163 | + } elseif($token['type'] === HTML5::STARTTAG && in_array($token['name'], |
|
| 3164 | + array('caption', 'col', 'colgroup', 'tbody', 'td', 'tfoot', 'th', |
|
| 3165 | + 'thead', 'tr'))) { |
|
| 3166 | + /* If the stack of open elements does not have a td or th element |
|
| 3167 | 3167 | in table scope, then this is a parse error; ignore the token. |
| 3168 | 3168 | (innerHTML case) */ |
| 3169 | - if(!$this->elementInScope(array('td', 'th'), true)) { |
|
| 3170 | - // Ignore. |
|
| 3169 | + if(!$this->elementInScope(array('td', 'th'), true)) { |
|
| 3170 | + // Ignore. |
|
| 3171 | 3171 | |
| 3172 | - /* Otherwise, close the cell (see below) and reprocess the current |
|
| 3172 | + /* Otherwise, close the cell (see below) and reprocess the current |
|
| 3173 | 3173 | token. */ |
| 3174 | - } else { |
|
| 3175 | - $this->closeCell(); |
|
| 3176 | - return $this->inRow($token); |
|
| 3177 | - } |
|
| 3174 | + } else { |
|
| 3175 | + $this->closeCell(); |
|
| 3176 | + return $this->inRow($token); |
|
| 3177 | + } |
|
| 3178 | 3178 | |
| 3179 | - /* An end tag whose tag name is one of: "body", "caption", "col", |
|
| 3179 | + /* An end tag whose tag name is one of: "body", "caption", "col", |
|
| 3180 | 3180 | "colgroup", "html" */ |
| 3181 | - } elseif($token['type'] === HTML5::ENDTAG && in_array($token['name'], |
|
| 3182 | - array('body', 'caption', 'col', 'colgroup', 'html'))) { |
|
| 3183 | - /* Parse error. Ignore the token. */ |
|
| 3181 | + } elseif($token['type'] === HTML5::ENDTAG && in_array($token['name'], |
|
| 3182 | + array('body', 'caption', 'col', 'colgroup', 'html'))) { |
|
| 3183 | + /* Parse error. Ignore the token. */ |
|
| 3184 | 3184 | |
| 3185 | - /* An end tag whose tag name is one of: "table", "tbody", "tfoot", |
|
| 3185 | + /* An end tag whose tag name is one of: "table", "tbody", "tfoot", |
|
| 3186 | 3186 | "thead", "tr" */ |
| 3187 | - } elseif($token['type'] === HTML5::ENDTAG && in_array($token['name'], |
|
| 3188 | - array('table', 'tbody', 'tfoot', 'thead', 'tr'))) { |
|
| 3189 | - /* If the stack of open elements does not have an element in table |
|
| 3187 | + } elseif($token['type'] === HTML5::ENDTAG && in_array($token['name'], |
|
| 3188 | + array('table', 'tbody', 'tfoot', 'thead', 'tr'))) { |
|
| 3189 | + /* If the stack of open elements does not have an element in table |
|
| 3190 | 3190 | scope with the same tag name as that of the token (which can only |
| 3191 | 3191 | happen for "tbody", "tfoot" and "thead", or, in the innerHTML case), |
| 3192 | 3192 | then this is a parse error and the token must be ignored. */ |
| 3193 | - if(!$this->elementInScope($token['name'], true)) { |
|
| 3194 | - // Ignore. |
|
| 3193 | + if(!$this->elementInScope($token['name'], true)) { |
|
| 3194 | + // Ignore. |
|
| 3195 | 3195 | |
| 3196 | - /* Otherwise, close the cell (see below) and reprocess the current |
|
| 3196 | + /* Otherwise, close the cell (see below) and reprocess the current |
|
| 3197 | 3197 | token. */ |
| 3198 | - } else { |
|
| 3199 | - $this->closeCell(); |
|
| 3200 | - return $this->inRow($token); |
|
| 3201 | - } |
|
| 3202 | - |
|
| 3203 | - /* Anything else */ |
|
| 3204 | - } else { |
|
| 3205 | - /* Process the token as if the insertion mode was "in body". */ |
|
| 3206 | - $this->inBody($token); |
|
| 3207 | - } |
|
| 3208 | - } |
|
| 3209 | - |
|
| 3210 | - private function inSelect($token) { |
|
| 3211 | - /* Handle the token as follows: */ |
|
| 3212 | - |
|
| 3213 | - /* A character token */ |
|
| 3214 | - if($token['type'] === HTML5::CHARACTR) { |
|
| 3215 | - /* Append the token's character to the current node. */ |
|
| 3216 | - $this->insertText($token['data']); |
|
| 3217 | - |
|
| 3218 | - /* A comment token */ |
|
| 3219 | - } elseif($token['type'] === HTML5::COMMENT) { |
|
| 3220 | - /* Append a Comment node to the current node with the data |
|
| 3198 | + } else { |
|
| 3199 | + $this->closeCell(); |
|
| 3200 | + return $this->inRow($token); |
|
| 3201 | + } |
|
| 3202 | + |
|
| 3203 | + /* Anything else */ |
|
| 3204 | + } else { |
|
| 3205 | + /* Process the token as if the insertion mode was "in body". */ |
|
| 3206 | + $this->inBody($token); |
|
| 3207 | + } |
|
| 3208 | + } |
|
| 3209 | + |
|
| 3210 | + private function inSelect($token) { |
|
| 3211 | + /* Handle the token as follows: */ |
|
| 3212 | + |
|
| 3213 | + /* A character token */ |
|
| 3214 | + if($token['type'] === HTML5::CHARACTR) { |
|
| 3215 | + /* Append the token's character to the current node. */ |
|
| 3216 | + $this->insertText($token['data']); |
|
| 3217 | + |
|
| 3218 | + /* A comment token */ |
|
| 3219 | + } elseif($token['type'] === HTML5::COMMENT) { |
|
| 3220 | + /* Append a Comment node to the current node with the data |
|
| 3221 | 3221 | attribute set to the data given in the comment token. */ |
| 3222 | - $this->insertComment($token['data']); |
|
| 3222 | + $this->insertComment($token['data']); |
|
| 3223 | 3223 | |
| 3224 | - /* A start tag token whose tag name is "option" */ |
|
| 3225 | - } elseif($token['type'] === HTML5::STARTTAG && |
|
| 3226 | - $token['name'] === 'option') { |
|
| 3227 | - /* If the current node is an option element, act as if an end tag |
|
| 3224 | + /* A start tag token whose tag name is "option" */ |
|
| 3225 | + } elseif($token['type'] === HTML5::STARTTAG && |
|
| 3226 | + $token['name'] === 'option') { |
|
| 3227 | + /* If the current node is an option element, act as if an end tag |
|
| 3228 | 3228 | with the tag name "option" had been seen. */ |
| 3229 | - if(end($this->stack)->nodeName === 'option') { |
|
| 3230 | - $this->inSelect(array( |
|
| 3231 | - 'name' => 'option', |
|
| 3232 | - 'type' => HTML5::ENDTAG |
|
| 3233 | - )); |
|
| 3234 | - } |
|
| 3235 | - |
|
| 3236 | - /* Insert an HTML element for the token. */ |
|
| 3237 | - $this->insertElement($token); |
|
| 3238 | - |
|
| 3239 | - /* A start tag token whose tag name is "optgroup" */ |
|
| 3240 | - } elseif($token['type'] === HTML5::STARTTAG && |
|
| 3241 | - $token['name'] === 'optgroup') { |
|
| 3242 | - /* If the current node is an option element, act as if an end tag |
|
| 3229 | + if(end($this->stack)->nodeName === 'option') { |
|
| 3230 | + $this->inSelect(array( |
|
| 3231 | + 'name' => 'option', |
|
| 3232 | + 'type' => HTML5::ENDTAG |
|
| 3233 | + )); |
|
| 3234 | + } |
|
| 3235 | + |
|
| 3236 | + /* Insert an HTML element for the token. */ |
|
| 3237 | + $this->insertElement($token); |
|
| 3238 | + |
|
| 3239 | + /* A start tag token whose tag name is "optgroup" */ |
|
| 3240 | + } elseif($token['type'] === HTML5::STARTTAG && |
|
| 3241 | + $token['name'] === 'optgroup') { |
|
| 3242 | + /* If the current node is an option element, act as if an end tag |
|
| 3243 | 3243 | with the tag name "option" had been seen. */ |
| 3244 | - if(end($this->stack)->nodeName === 'option') { |
|
| 3245 | - $this->inSelect(array( |
|
| 3246 | - 'name' => 'option', |
|
| 3247 | - 'type' => HTML5::ENDTAG |
|
| 3248 | - )); |
|
| 3249 | - } |
|
| 3250 | - |
|
| 3251 | - /* If the current node is an optgroup element, act as if an end tag |
|
| 3244 | + if(end($this->stack)->nodeName === 'option') { |
|
| 3245 | + $this->inSelect(array( |
|
| 3246 | + 'name' => 'option', |
|
| 3247 | + 'type' => HTML5::ENDTAG |
|
| 3248 | + )); |
|
| 3249 | + } |
|
| 3250 | + |
|
| 3251 | + /* If the current node is an optgroup element, act as if an end tag |
|
| 3252 | 3252 | with the tag name "optgroup" had been seen. */ |
| 3253 | - if(end($this->stack)->nodeName === 'optgroup') { |
|
| 3254 | - $this->inSelect(array( |
|
| 3255 | - 'name' => 'optgroup', |
|
| 3256 | - 'type' => HTML5::ENDTAG |
|
| 3257 | - )); |
|
| 3258 | - } |
|
| 3259 | - |
|
| 3260 | - /* Insert an HTML element for the token. */ |
|
| 3261 | - $this->insertElement($token); |
|
| 3262 | - |
|
| 3263 | - /* An end tag token whose tag name is "optgroup" */ |
|
| 3264 | - } elseif($token['type'] === HTML5::ENDTAG && |
|
| 3265 | - $token['name'] === 'optgroup') { |
|
| 3266 | - /* First, if the current node is an option element, and the node |
|
| 3253 | + if(end($this->stack)->nodeName === 'optgroup') { |
|
| 3254 | + $this->inSelect(array( |
|
| 3255 | + 'name' => 'optgroup', |
|
| 3256 | + 'type' => HTML5::ENDTAG |
|
| 3257 | + )); |
|
| 3258 | + } |
|
| 3259 | + |
|
| 3260 | + /* Insert an HTML element for the token. */ |
|
| 3261 | + $this->insertElement($token); |
|
| 3262 | + |
|
| 3263 | + /* An end tag token whose tag name is "optgroup" */ |
|
| 3264 | + } elseif($token['type'] === HTML5::ENDTAG && |
|
| 3265 | + $token['name'] === 'optgroup') { |
|
| 3266 | + /* First, if the current node is an option element, and the node |
|
| 3267 | 3267 | immediately before it in the stack of open elements is an optgroup |
| 3268 | 3268 | element, then act as if an end tag with the tag name "option" had |
| 3269 | 3269 | been seen. */ |
| 3270 | - $elements_in_stack = count($this->stack); |
|
| 3270 | + $elements_in_stack = count($this->stack); |
|
| 3271 | 3271 | |
| 3272 | - if($this->stack[$elements_in_stack - 1]->nodeName === 'option' && |
|
| 3273 | - $this->stack[$elements_in_stack - 2]->nodeName === 'optgroup') { |
|
| 3274 | - $this->inSelect(array( |
|
| 3275 | - 'name' => 'option', |
|
| 3276 | - 'type' => HTML5::ENDTAG |
|
| 3277 | - )); |
|
| 3278 | - } |
|
| 3272 | + if($this->stack[$elements_in_stack - 1]->nodeName === 'option' && |
|
| 3273 | + $this->stack[$elements_in_stack - 2]->nodeName === 'optgroup') { |
|
| 3274 | + $this->inSelect(array( |
|
| 3275 | + 'name' => 'option', |
|
| 3276 | + 'type' => HTML5::ENDTAG |
|
| 3277 | + )); |
|
| 3278 | + } |
|
| 3279 | 3279 | |
| 3280 | - /* If the current node is an optgroup element, then pop that node |
|
| 3280 | + /* If the current node is an optgroup element, then pop that node |
|
| 3281 | 3281 | from the stack of open elements. Otherwise, this is a parse error, |
| 3282 | 3282 | ignore the token. */ |
| 3283 | - if($this->stack[$elements_in_stack - 1] === 'optgroup') { |
|
| 3284 | - array_pop($this->stack); |
|
| 3285 | - } |
|
| 3286 | - |
|
| 3287 | - /* An end tag token whose tag name is "option" */ |
|
| 3288 | - } elseif($token['type'] === HTML5::ENDTAG && |
|
| 3289 | - $token['name'] === 'option') { |
|
| 3290 | - /* If the current node is an option element, then pop that node |
|
| 3283 | + if($this->stack[$elements_in_stack - 1] === 'optgroup') { |
|
| 3284 | + array_pop($this->stack); |
|
| 3285 | + } |
|
| 3286 | + |
|
| 3287 | + /* An end tag token whose tag name is "option" */ |
|
| 3288 | + } elseif($token['type'] === HTML5::ENDTAG && |
|
| 3289 | + $token['name'] === 'option') { |
|
| 3290 | + /* If the current node is an option element, then pop that node |
|
| 3291 | 3291 | from the stack of open elements. Otherwise, this is a parse error, |
| 3292 | 3292 | ignore the token. */ |
| 3293 | - if(end($this->stack)->nodeName === 'option') { |
|
| 3294 | - array_pop($this->stack); |
|
| 3295 | - } |
|
| 3296 | - |
|
| 3297 | - /* An end tag whose tag name is "select" */ |
|
| 3298 | - } elseif($token['type'] === HTML5::ENDTAG && |
|
| 3299 | - $token['name'] === 'select') { |
|
| 3300 | - /* If the stack of open elements does not have an element in table |
|
| 3293 | + if(end($this->stack)->nodeName === 'option') { |
|
| 3294 | + array_pop($this->stack); |
|
| 3295 | + } |
|
| 3296 | + |
|
| 3297 | + /* An end tag whose tag name is "select" */ |
|
| 3298 | + } elseif($token['type'] === HTML5::ENDTAG && |
|
| 3299 | + $token['name'] === 'select') { |
|
| 3300 | + /* If the stack of open elements does not have an element in table |
|
| 3301 | 3301 | scope with the same tag name as the token, this is a parse error. |
| 3302 | 3302 | Ignore the token. (innerHTML case) */ |
| 3303 | - if(!$this->elementInScope($token['name'], true)) { |
|
| 3304 | - // w/e |
|
| 3303 | + if(!$this->elementInScope($token['name'], true)) { |
|
| 3304 | + // w/e |
|
| 3305 | 3305 | |
| 3306 | - /* Otherwise: */ |
|
| 3307 | - } else { |
|
| 3308 | - /* Pop elements from the stack of open elements until a select |
|
| 3306 | + /* Otherwise: */ |
|
| 3307 | + } else { |
|
| 3308 | + /* Pop elements from the stack of open elements until a select |
|
| 3309 | 3309 | element has been popped from the stack. */ |
| 3310 | - while(true) { |
|
| 3311 | - $current = end($this->stack)->nodeName; |
|
| 3312 | - array_pop($this->stack); |
|
| 3313 | - |
|
| 3314 | - if($current === 'select') { |
|
| 3315 | - break; |
|
| 3316 | - } |
|
| 3317 | - } |
|
| 3318 | - |
|
| 3319 | - /* Reset the insertion mode appropriately. */ |
|
| 3320 | - $this->resetInsertionMode(); |
|
| 3321 | - } |
|
| 3322 | - |
|
| 3323 | - /* A start tag whose tag name is "select" */ |
|
| 3324 | - } elseif($token['name'] === 'select' && |
|
| 3325 | - $token['type'] === HTML5::STARTTAG) { |
|
| 3326 | - /* Parse error. Act as if the token had been an end tag with the |
|
| 3310 | + while(true) { |
|
| 3311 | + $current = end($this->stack)->nodeName; |
|
| 3312 | + array_pop($this->stack); |
|
| 3313 | + |
|
| 3314 | + if($current === 'select') { |
|
| 3315 | + break; |
|
| 3316 | + } |
|
| 3317 | + } |
|
| 3318 | + |
|
| 3319 | + /* Reset the insertion mode appropriately. */ |
|
| 3320 | + $this->resetInsertionMode(); |
|
| 3321 | + } |
|
| 3322 | + |
|
| 3323 | + /* A start tag whose tag name is "select" */ |
|
| 3324 | + } elseif($token['name'] === 'select' && |
|
| 3325 | + $token['type'] === HTML5::STARTTAG) { |
|
| 3326 | + /* Parse error. Act as if the token had been an end tag with the |
|
| 3327 | 3327 | tag name "select" instead. */ |
| 3328 | - $this->inSelect(array( |
|
| 3329 | - 'name' => 'select', |
|
| 3330 | - 'type' => HTML5::ENDTAG |
|
| 3331 | - )); |
|
| 3328 | + $this->inSelect(array( |
|
| 3329 | + 'name' => 'select', |
|
| 3330 | + 'type' => HTML5::ENDTAG |
|
| 3331 | + )); |
|
| 3332 | 3332 | |
| 3333 | - /* An end tag whose tag name is one of: "caption", "table", "tbody", |
|
| 3333 | + /* An end tag whose tag name is one of: "caption", "table", "tbody", |
|
| 3334 | 3334 | "tfoot", "thead", "tr", "td", "th" */ |
| 3335 | - } elseif(in_array($token['name'], array('caption', 'table', 'tbody', |
|
| 3336 | - 'tfoot', 'thead', 'tr', 'td', 'th')) && $token['type'] === HTML5::ENDTAG) { |
|
| 3337 | - /* Parse error. */ |
|
| 3338 | - // w/e |
|
| 3335 | + } elseif(in_array($token['name'], array('caption', 'table', 'tbody', |
|
| 3336 | + 'tfoot', 'thead', 'tr', 'td', 'th')) && $token['type'] === HTML5::ENDTAG) { |
|
| 3337 | + /* Parse error. */ |
|
| 3338 | + // w/e |
|
| 3339 | 3339 | |
| 3340 | - /* If the stack of open elements has an element in table scope with |
|
| 3340 | + /* If the stack of open elements has an element in table scope with |
|
| 3341 | 3341 | the same tag name as that of the token, then act as if an end tag |
| 3342 | 3342 | with the tag name "select" had been seen, and reprocess the token. |
| 3343 | 3343 | Otherwise, ignore the token. */ |
| 3344 | - if($this->elementInScope($token['name'], true)) { |
|
| 3345 | - $this->inSelect(array( |
|
| 3346 | - 'name' => 'select', |
|
| 3347 | - 'type' => HTML5::ENDTAG |
|
| 3348 | - )); |
|
| 3344 | + if($this->elementInScope($token['name'], true)) { |
|
| 3345 | + $this->inSelect(array( |
|
| 3346 | + 'name' => 'select', |
|
| 3347 | + 'type' => HTML5::ENDTAG |
|
| 3348 | + )); |
|
| 3349 | 3349 | |
| 3350 | - $this->mainPhase($token); |
|
| 3351 | - } |
|
| 3350 | + $this->mainPhase($token); |
|
| 3351 | + } |
|
| 3352 | 3352 | |
| 3353 | - /* Anything else */ |
|
| 3354 | - } else { |
|
| 3355 | - /* Parse error. Ignore the token. */ |
|
| 3356 | - } |
|
| 3357 | - } |
|
| 3353 | + /* Anything else */ |
|
| 3354 | + } else { |
|
| 3355 | + /* Parse error. Ignore the token. */ |
|
| 3356 | + } |
|
| 3357 | + } |
|
| 3358 | 3358 | |
| 3359 | - private function afterBody($token) { |
|
| 3360 | - /* Handle the token as follows: */ |
|
| 3359 | + private function afterBody($token) { |
|
| 3360 | + /* Handle the token as follows: */ |
|
| 3361 | 3361 | |
| 3362 | - /* A character token that is one of one of U+0009 CHARACTER TABULATION, |
|
| 3362 | + /* A character token that is one of one of U+0009 CHARACTER TABULATION, |
|
| 3363 | 3363 | U+000A LINE FEED (LF), U+000B LINE TABULATION, U+000C FORM FEED (FF), |
| 3364 | 3364 | or U+0020 SPACE */ |
| 3365 | - if($token['type'] === HTML5::CHARACTR && |
|
| 3366 | - preg_match('/^[\t\n\x0b\x0c ]+$/', $token['data'])) { |
|
| 3367 | - /* Process the token as it would be processed if the insertion mode |
|
| 3365 | + if($token['type'] === HTML5::CHARACTR && |
|
| 3366 | + preg_match('/^[\t\n\x0b\x0c ]+$/', $token['data'])) { |
|
| 3367 | + /* Process the token as it would be processed if the insertion mode |
|
| 3368 | 3368 | was "in body". */ |
| 3369 | - $this->inBody($token); |
|
| 3369 | + $this->inBody($token); |
|
| 3370 | 3370 | |
| 3371 | - /* A comment token */ |
|
| 3372 | - } elseif($token['type'] === HTML5::COMMENT) { |
|
| 3373 | - /* Append a Comment node to the first element in the stack of open |
|
| 3371 | + /* A comment token */ |
|
| 3372 | + } elseif($token['type'] === HTML5::COMMENT) { |
|
| 3373 | + /* Append a Comment node to the first element in the stack of open |
|
| 3374 | 3374 | elements (the html element), with the data attribute set to the |
| 3375 | 3375 | data given in the comment token. */ |
| 3376 | - $comment = $this->dom->createComment($token['data']); |
|
| 3377 | - $this->stack[0]->appendChild($comment); |
|
| 3376 | + $comment = $this->dom->createComment($token['data']); |
|
| 3377 | + $this->stack[0]->appendChild($comment); |
|
| 3378 | 3378 | |
| 3379 | - /* An end tag with the tag name "html" */ |
|
| 3380 | - } elseif($token['type'] === HTML5::ENDTAG && $token['name'] === 'html') { |
|
| 3381 | - /* If the parser was originally created in order to handle the |
|
| 3379 | + /* An end tag with the tag name "html" */ |
|
| 3380 | + } elseif($token['type'] === HTML5::ENDTAG && $token['name'] === 'html') { |
|
| 3381 | + /* If the parser was originally created in order to handle the |
|
| 3382 | 3382 | setting of an element's innerHTML attribute, this is a parse error; |
| 3383 | 3383 | ignore the token. (The element will be an html element in this |
| 3384 | 3384 | case.) (innerHTML case) */ |
| 3385 | 3385 | |
| 3386 | - /* Otherwise, switch to the trailing end phase. */ |
|
| 3387 | - $this->phase = self::END_PHASE; |
|
| 3386 | + /* Otherwise, switch to the trailing end phase. */ |
|
| 3387 | + $this->phase = self::END_PHASE; |
|
| 3388 | 3388 | |
| 3389 | - /* Anything else */ |
|
| 3390 | - } else { |
|
| 3391 | - /* Parse error. Set the insertion mode to "in body" and reprocess |
|
| 3389 | + /* Anything else */ |
|
| 3390 | + } else { |
|
| 3391 | + /* Parse error. Set the insertion mode to "in body" and reprocess |
|
| 3392 | 3392 | the token. */ |
| 3393 | - $this->mode = self::IN_BODY; |
|
| 3394 | - return $this->inBody($token); |
|
| 3395 | - } |
|
| 3396 | - } |
|
| 3393 | + $this->mode = self::IN_BODY; |
|
| 3394 | + return $this->inBody($token); |
|
| 3395 | + } |
|
| 3396 | + } |
|
| 3397 | 3397 | |
| 3398 | - private function inFrameset($token) { |
|
| 3399 | - /* Handle the token as follows: */ |
|
| 3398 | + private function inFrameset($token) { |
|
| 3399 | + /* Handle the token as follows: */ |
|
| 3400 | 3400 | |
| 3401 | - /* A character token that is one of one of U+0009 CHARACTER TABULATION, |
|
| 3401 | + /* A character token that is one of one of U+0009 CHARACTER TABULATION, |
|
| 3402 | 3402 | U+000A LINE FEED (LF), U+000B LINE TABULATION, U+000C FORM FEED (FF), |
| 3403 | 3403 | U+000D CARRIAGE RETURN (CR), or U+0020 SPACE */ |
| 3404 | - if($token['type'] === HTML5::CHARACTR && |
|
| 3405 | - preg_match('/^[\t\n\x0b\x0c ]+$/', $token['data'])) { |
|
| 3406 | - /* Append the character to the current node. */ |
|
| 3407 | - $this->insertText($token['data']); |
|
| 3408 | - |
|
| 3409 | - /* A comment token */ |
|
| 3410 | - } elseif($token['type'] === HTML5::COMMENT) { |
|
| 3411 | - /* Append a Comment node to the current node with the data |
|
| 3404 | + if($token['type'] === HTML5::CHARACTR && |
|
| 3405 | + preg_match('/^[\t\n\x0b\x0c ]+$/', $token['data'])) { |
|
| 3406 | + /* Append the character to the current node. */ |
|
| 3407 | + $this->insertText($token['data']); |
|
| 3408 | + |
|
| 3409 | + /* A comment token */ |
|
| 3410 | + } elseif($token['type'] === HTML5::COMMENT) { |
|
| 3411 | + /* Append a Comment node to the current node with the data |
|
| 3412 | 3412 | attribute set to the data given in the comment token. */ |
| 3413 | - $this->insertComment($token['data']); |
|
| 3413 | + $this->insertComment($token['data']); |
|
| 3414 | 3414 | |
| 3415 | - /* A start tag with the tag name "frameset" */ |
|
| 3416 | - } elseif($token['name'] === 'frameset' && |
|
| 3417 | - $token['type'] === HTML5::STARTTAG) { |
|
| 3418 | - $this->insertElement($token); |
|
| 3415 | + /* A start tag with the tag name "frameset" */ |
|
| 3416 | + } elseif($token['name'] === 'frameset' && |
|
| 3417 | + $token['type'] === HTML5::STARTTAG) { |
|
| 3418 | + $this->insertElement($token); |
|
| 3419 | 3419 | |
| 3420 | - /* An end tag with the tag name "frameset" */ |
|
| 3421 | - } elseif($token['name'] === 'frameset' && |
|
| 3422 | - $token['type'] === HTML5::ENDTAG) { |
|
| 3423 | - /* If the current node is the root html element, then this is a |
|
| 3420 | + /* An end tag with the tag name "frameset" */ |
|
| 3421 | + } elseif($token['name'] === 'frameset' && |
|
| 3422 | + $token['type'] === HTML5::ENDTAG) { |
|
| 3423 | + /* If the current node is the root html element, then this is a |
|
| 3424 | 3424 | parse error; ignore the token. (innerHTML case) */ |
| 3425 | - if(end($this->stack)->nodeName === 'html') { |
|
| 3426 | - // Ignore |
|
| 3425 | + if(end($this->stack)->nodeName === 'html') { |
|
| 3426 | + // Ignore |
|
| 3427 | 3427 | |
| 3428 | - } else { |
|
| 3429 | - /* Otherwise, pop the current node from the stack of open |
|
| 3428 | + } else { |
|
| 3429 | + /* Otherwise, pop the current node from the stack of open |
|
| 3430 | 3430 | elements. */ |
| 3431 | - array_pop($this->stack); |
|
| 3431 | + array_pop($this->stack); |
|
| 3432 | 3432 | |
| 3433 | - /* If the parser was not originally created in order to handle |
|
| 3433 | + /* If the parser was not originally created in order to handle |
|
| 3434 | 3434 | the setting of an element's innerHTML attribute (innerHTML case), |
| 3435 | 3435 | and the current node is no longer a frameset element, then change |
| 3436 | 3436 | the insertion mode to "after frameset". */ |
| 3437 | - $this->mode = self::AFTR_FRAME; |
|
| 3438 | - } |
|
| 3439 | - |
|
| 3440 | - /* A start tag with the tag name "frame" */ |
|
| 3441 | - } elseif($token['name'] === 'frame' && |
|
| 3442 | - $token['type'] === HTML5::STARTTAG) { |
|
| 3443 | - /* Insert an HTML element for the token. */ |
|
| 3444 | - $this->insertElement($token); |
|
| 3445 | - |
|
| 3446 | - /* Immediately pop the current node off the stack of open elements. */ |
|
| 3447 | - array_pop($this->stack); |
|
| 3448 | - |
|
| 3449 | - /* A start tag with the tag name "noframes" */ |
|
| 3450 | - } elseif($token['name'] === 'noframes' && |
|
| 3451 | - $token['type'] === HTML5::STARTTAG) { |
|
| 3452 | - /* Process the token as if the insertion mode had been "in body". */ |
|
| 3453 | - $this->inBody($token); |
|
| 3454 | - |
|
| 3455 | - /* Anything else */ |
|
| 3456 | - } else { |
|
| 3457 | - /* Parse error. Ignore the token. */ |
|
| 3458 | - } |
|
| 3459 | - } |
|
| 3460 | - |
|
| 3461 | - private function afterFrameset($token) { |
|
| 3462 | - /* Handle the token as follows: */ |
|
| 3463 | - |
|
| 3464 | - /* A character token that is one of one of U+0009 CHARACTER TABULATION, |
|
| 3437 | + $this->mode = self::AFTR_FRAME; |
|
| 3438 | + } |
|
| 3439 | + |
|
| 3440 | + /* A start tag with the tag name "frame" */ |
|
| 3441 | + } elseif($token['name'] === 'frame' && |
|
| 3442 | + $token['type'] === HTML5::STARTTAG) { |
|
| 3443 | + /* Insert an HTML element for the token. */ |
|
| 3444 | + $this->insertElement($token); |
|
| 3445 | + |
|
| 3446 | + /* Immediately pop the current node off the stack of open elements. */ |
|
| 3447 | + array_pop($this->stack); |
|
| 3448 | + |
|
| 3449 | + /* A start tag with the tag name "noframes" */ |
|
| 3450 | + } elseif($token['name'] === 'noframes' && |
|
| 3451 | + $token['type'] === HTML5::STARTTAG) { |
|
| 3452 | + /* Process the token as if the insertion mode had been "in body". */ |
|
| 3453 | + $this->inBody($token); |
|
| 3454 | + |
|
| 3455 | + /* Anything else */ |
|
| 3456 | + } else { |
|
| 3457 | + /* Parse error. Ignore the token. */ |
|
| 3458 | + } |
|
| 3459 | + } |
|
| 3460 | + |
|
| 3461 | + private function afterFrameset($token) { |
|
| 3462 | + /* Handle the token as follows: */ |
|
| 3463 | + |
|
| 3464 | + /* A character token that is one of one of U+0009 CHARACTER TABULATION, |
|
| 3465 | 3465 | U+000A LINE FEED (LF), U+000B LINE TABULATION, U+000C FORM FEED (FF), |
| 3466 | 3466 | U+000D CARRIAGE RETURN (CR), or U+0020 SPACE */ |
| 3467 | - if($token['type'] === HTML5::CHARACTR && |
|
| 3468 | - preg_match('/^[\t\n\x0b\x0c ]+$/', $token['data'])) { |
|
| 3469 | - /* Append the character to the current node. */ |
|
| 3470 | - $this->insertText($token['data']); |
|
| 3471 | - |
|
| 3472 | - /* A comment token */ |
|
| 3473 | - } elseif($token['type'] === HTML5::COMMENT) { |
|
| 3474 | - /* Append a Comment node to the current node with the data |
|
| 3467 | + if($token['type'] === HTML5::CHARACTR && |
|
| 3468 | + preg_match('/^[\t\n\x0b\x0c ]+$/', $token['data'])) { |
|
| 3469 | + /* Append the character to the current node. */ |
|
| 3470 | + $this->insertText($token['data']); |
|
| 3471 | + |
|
| 3472 | + /* A comment token */ |
|
| 3473 | + } elseif($token['type'] === HTML5::COMMENT) { |
|
| 3474 | + /* Append a Comment node to the current node with the data |
|
| 3475 | 3475 | attribute set to the data given in the comment token. */ |
| 3476 | - $this->insertComment($token['data']); |
|
| 3477 | - |
|
| 3478 | - /* An end tag with the tag name "html" */ |
|
| 3479 | - } elseif($token['name'] === 'html' && |
|
| 3480 | - $token['type'] === HTML5::ENDTAG) { |
|
| 3481 | - /* Switch to the trailing end phase. */ |
|
| 3482 | - $this->phase = self::END_PHASE; |
|
| 3483 | - |
|
| 3484 | - /* A start tag with the tag name "noframes" */ |
|
| 3485 | - } elseif($token['name'] === 'noframes' && |
|
| 3486 | - $token['type'] === HTML5::STARTTAG) { |
|
| 3487 | - /* Process the token as if the insertion mode had been "in body". */ |
|
| 3488 | - $this->inBody($token); |
|
| 3489 | - |
|
| 3490 | - /* Anything else */ |
|
| 3491 | - } else { |
|
| 3492 | - /* Parse error. Ignore the token. */ |
|
| 3493 | - } |
|
| 3494 | - } |
|
| 3495 | - |
|
| 3496 | - private function trailingEndPhase($token) { |
|
| 3497 | - /* After the main phase, as each token is emitted from the tokenisation |
|
| 3476 | + $this->insertComment($token['data']); |
|
| 3477 | + |
|
| 3478 | + /* An end tag with the tag name "html" */ |
|
| 3479 | + } elseif($token['name'] === 'html' && |
|
| 3480 | + $token['type'] === HTML5::ENDTAG) { |
|
| 3481 | + /* Switch to the trailing end phase. */ |
|
| 3482 | + $this->phase = self::END_PHASE; |
|
| 3483 | + |
|
| 3484 | + /* A start tag with the tag name "noframes" */ |
|
| 3485 | + } elseif($token['name'] === 'noframes' && |
|
| 3486 | + $token['type'] === HTML5::STARTTAG) { |
|
| 3487 | + /* Process the token as if the insertion mode had been "in body". */ |
|
| 3488 | + $this->inBody($token); |
|
| 3489 | + |
|
| 3490 | + /* Anything else */ |
|
| 3491 | + } else { |
|
| 3492 | + /* Parse error. Ignore the token. */ |
|
| 3493 | + } |
|
| 3494 | + } |
|
| 3495 | + |
|
| 3496 | + private function trailingEndPhase($token) { |
|
| 3497 | + /* After the main phase, as each token is emitted from the tokenisation |
|
| 3498 | 3498 | stage, it must be processed as described in this section. */ |
| 3499 | 3499 | |
| 3500 | - /* A DOCTYPE token */ |
|
| 3501 | - if($token['type'] === HTML5::DOCTYPE) { |
|
| 3502 | - // Parse error. Ignore the token. |
|
| 3500 | + /* A DOCTYPE token */ |
|
| 3501 | + if($token['type'] === HTML5::DOCTYPE) { |
|
| 3502 | + // Parse error. Ignore the token. |
|
| 3503 | 3503 | |
| 3504 | - /* A comment token */ |
|
| 3505 | - } elseif($token['type'] === HTML5::COMMENT) { |
|
| 3506 | - /* Append a Comment node to the Document object with the data |
|
| 3504 | + /* A comment token */ |
|
| 3505 | + } elseif($token['type'] === HTML5::COMMENT) { |
|
| 3506 | + /* Append a Comment node to the Document object with the data |
|
| 3507 | 3507 | attribute set to the data given in the comment token. */ |
| 3508 | - $comment = $this->dom->createComment($token['data']); |
|
| 3509 | - $this->dom->appendChild($comment); |
|
| 3508 | + $comment = $this->dom->createComment($token['data']); |
|
| 3509 | + $this->dom->appendChild($comment); |
|
| 3510 | 3510 | |
| 3511 | - /* A character token that is one of one of U+0009 CHARACTER TABULATION, |
|
| 3511 | + /* A character token that is one of one of U+0009 CHARACTER TABULATION, |
|
| 3512 | 3512 | U+000A LINE FEED (LF), U+000B LINE TABULATION, U+000C FORM FEED (FF), |
| 3513 | 3513 | or U+0020 SPACE */ |
| 3514 | - } elseif($token['type'] === HTML5::CHARACTR && |
|
| 3515 | - preg_match('/^[\t\n\x0b\x0c ]+$/', $token['data'])) { |
|
| 3516 | - /* Process the token as it would be processed in the main phase. */ |
|
| 3517 | - $this->mainPhase($token); |
|
| 3514 | + } elseif($token['type'] === HTML5::CHARACTR && |
|
| 3515 | + preg_match('/^[\t\n\x0b\x0c ]+$/', $token['data'])) { |
|
| 3516 | + /* Process the token as it would be processed in the main phase. */ |
|
| 3517 | + $this->mainPhase($token); |
|
| 3518 | 3518 | |
| 3519 | - /* A character token that is not one of U+0009 CHARACTER TABULATION, |
|
| 3519 | + /* A character token that is not one of U+0009 CHARACTER TABULATION, |
|
| 3520 | 3520 | U+000A LINE FEED (LF), U+000B LINE TABULATION, U+000C FORM FEED (FF), |
| 3521 | 3521 | or U+0020 SPACE. Or a start tag token. Or an end tag token. */ |
| 3522 | - } elseif(($token['type'] === HTML5::CHARACTR && |
|
| 3523 | - preg_match('/^[\t\n\x0b\x0c ]+$/', $token['data'])) || |
|
| 3524 | - $token['type'] === HTML5::STARTTAG || $token['type'] === HTML5::ENDTAG) { |
|
| 3525 | - /* Parse error. Switch back to the main phase and reprocess the |
|
| 3522 | + } elseif(($token['type'] === HTML5::CHARACTR && |
|
| 3523 | + preg_match('/^[\t\n\x0b\x0c ]+$/', $token['data'])) || |
|
| 3524 | + $token['type'] === HTML5::STARTTAG || $token['type'] === HTML5::ENDTAG) { |
|
| 3525 | + /* Parse error. Switch back to the main phase and reprocess the |
|
| 3526 | 3526 | token. */ |
| 3527 | - $this->phase = self::MAIN_PHASE; |
|
| 3528 | - return $this->mainPhase($token); |
|
| 3529 | - |
|
| 3530 | - /* An end-of-file token */ |
|
| 3531 | - } elseif($token['type'] === HTML5::EOF) { |
|
| 3532 | - /* OMG DONE!! */ |
|
| 3533 | - } |
|
| 3534 | - } |
|
| 3535 | - |
|
| 3536 | - private function insertElement($token, $append = true, $check = false) { |
|
| 3537 | - // Proprietary workaround for libxml2's limitations with tag names |
|
| 3538 | - if ($check) { |
|
| 3539 | - // Slightly modified HTML5 tag-name modification, |
|
| 3540 | - // removing anything that's not an ASCII letter, digit, or hyphen |
|
| 3541 | - $token['name'] = preg_replace('/[^a-z0-9-]/i', '', $token['name']); |
|
| 3542 | - // Remove leading hyphens and numbers |
|
| 3543 | - $token['name'] = ltrim($token['name'], '-0..9'); |
|
| 3544 | - // In theory, this should ever be needed, but just in case |
|
| 3545 | - if ($token['name'] === '') $token['name'] = 'span'; // arbitrary generic choice |
|
| 3546 | - } |
|
| 3527 | + $this->phase = self::MAIN_PHASE; |
|
| 3528 | + return $this->mainPhase($token); |
|
| 3529 | + |
|
| 3530 | + /* An end-of-file token */ |
|
| 3531 | + } elseif($token['type'] === HTML5::EOF) { |
|
| 3532 | + /* OMG DONE!! */ |
|
| 3533 | + } |
|
| 3534 | + } |
|
| 3535 | + |
|
| 3536 | + private function insertElement($token, $append = true, $check = false) { |
|
| 3537 | + // Proprietary workaround for libxml2's limitations with tag names |
|
| 3538 | + if ($check) { |
|
| 3539 | + // Slightly modified HTML5 tag-name modification, |
|
| 3540 | + // removing anything that's not an ASCII letter, digit, or hyphen |
|
| 3541 | + $token['name'] = preg_replace('/[^a-z0-9-]/i', '', $token['name']); |
|
| 3542 | + // Remove leading hyphens and numbers |
|
| 3543 | + $token['name'] = ltrim($token['name'], '-0..9'); |
|
| 3544 | + // In theory, this should ever be needed, but just in case |
|
| 3545 | + if ($token['name'] === '') $token['name'] = 'span'; // arbitrary generic choice |
|
| 3546 | + } |
|
| 3547 | 3547 | |
| 3548 | - $el = $this->dom->createElement($token['name']); |
|
| 3548 | + $el = $this->dom->createElement($token['name']); |
|
| 3549 | 3549 | |
| 3550 | - foreach($token['attr'] as $attr) { |
|
| 3551 | - if(!$el->hasAttribute($attr['name'])) { |
|
| 3552 | - $el->setAttribute($attr['name'], $attr['value']); |
|
| 3553 | - } |
|
| 3554 | - } |
|
| 3550 | + foreach($token['attr'] as $attr) { |
|
| 3551 | + if(!$el->hasAttribute($attr['name'])) { |
|
| 3552 | + $el->setAttribute($attr['name'], $attr['value']); |
|
| 3553 | + } |
|
| 3554 | + } |
|
| 3555 | 3555 | |
| 3556 | - $this->appendToRealParent($el); |
|
| 3557 | - $this->stack[] = $el; |
|
| 3556 | + $this->appendToRealParent($el); |
|
| 3557 | + $this->stack[] = $el; |
|
| 3558 | 3558 | |
| 3559 | - return $el; |
|
| 3560 | - } |
|
| 3559 | + return $el; |
|
| 3560 | + } |
|
| 3561 | 3561 | |
| 3562 | - private function insertText($data) { |
|
| 3563 | - $text = $this->dom->createTextNode($data); |
|
| 3564 | - $this->appendToRealParent($text); |
|
| 3565 | - } |
|
| 3562 | + private function insertText($data) { |
|
| 3563 | + $text = $this->dom->createTextNode($data); |
|
| 3564 | + $this->appendToRealParent($text); |
|
| 3565 | + } |
|
| 3566 | 3566 | |
| 3567 | - private function insertComment($data) { |
|
| 3568 | - $comment = $this->dom->createComment($data); |
|
| 3569 | - $this->appendToRealParent($comment); |
|
| 3570 | - } |
|
| 3567 | + private function insertComment($data) { |
|
| 3568 | + $comment = $this->dom->createComment($data); |
|
| 3569 | + $this->appendToRealParent($comment); |
|
| 3570 | + } |
|
| 3571 | 3571 | |
| 3572 | - private function appendToRealParent($node) { |
|
| 3573 | - if($this->foster_parent === null) { |
|
| 3574 | - end($this->stack)->appendChild($node); |
|
| 3572 | + private function appendToRealParent($node) { |
|
| 3573 | + if($this->foster_parent === null) { |
|
| 3574 | + end($this->stack)->appendChild($node); |
|
| 3575 | 3575 | |
| 3576 | - } elseif($this->foster_parent !== null) { |
|
| 3577 | - /* If the foster parent element is the parent element of the |
|
| 3576 | + } elseif($this->foster_parent !== null) { |
|
| 3577 | + /* If the foster parent element is the parent element of the |
|
| 3578 | 3578 | last table element in the stack of open elements, then the new |
| 3579 | 3579 | node must be inserted immediately before the last table element |
| 3580 | 3580 | in the stack of open elements in the foster parent element; |
| 3581 | 3581 | otherwise, the new node must be appended to the foster parent |
| 3582 | 3582 | element. */ |
| 3583 | - for($n = count($this->stack) - 1; $n >= 0; $n--) { |
|
| 3584 | - if($this->stack[$n]->nodeName === 'table' && |
|
| 3585 | - $this->stack[$n]->parentNode !== null) { |
|
| 3586 | - $table = $this->stack[$n]; |
|
| 3587 | - break; |
|
| 3588 | - } |
|
| 3589 | - } |
|
| 3590 | - |
|
| 3591 | - if(isset($table) && $this->foster_parent->isSameNode($table->parentNode)) |
|
| 3592 | - $this->foster_parent->insertBefore($node, $table); |
|
| 3593 | - else |
|
| 3594 | - $this->foster_parent->appendChild($node); |
|
| 3595 | - |
|
| 3596 | - $this->foster_parent = null; |
|
| 3597 | - } |
|
| 3598 | - } |
|
| 3599 | - |
|
| 3600 | - private function elementInScope($el, $table = false) { |
|
| 3601 | - if(is_array($el)) { |
|
| 3602 | - foreach($el as $element) { |
|
| 3603 | - if($this->elementInScope($element, $table)) { |
|
| 3604 | - return true; |
|
| 3605 | - } |
|
| 3606 | - } |
|
| 3607 | - |
|
| 3608 | - return false; |
|
| 3609 | - } |
|
| 3610 | - |
|
| 3611 | - $leng = count($this->stack); |
|
| 3612 | - |
|
| 3613 | - for($n = 0; $n < $leng; $n++) { |
|
| 3614 | - /* 1. Initialise node to be the current node (the bottommost node of |
|
| 3583 | + for($n = count($this->stack) - 1; $n >= 0; $n--) { |
|
| 3584 | + if($this->stack[$n]->nodeName === 'table' && |
|
| 3585 | + $this->stack[$n]->parentNode !== null) { |
|
| 3586 | + $table = $this->stack[$n]; |
|
| 3587 | + break; |
|
| 3588 | + } |
|
| 3589 | + } |
|
| 3590 | + |
|
| 3591 | + if(isset($table) && $this->foster_parent->isSameNode($table->parentNode)) |
|
| 3592 | + $this->foster_parent->insertBefore($node, $table); |
|
| 3593 | + else |
|
| 3594 | + $this->foster_parent->appendChild($node); |
|
| 3595 | + |
|
| 3596 | + $this->foster_parent = null; |
|
| 3597 | + } |
|
| 3598 | + } |
|
| 3599 | + |
|
| 3600 | + private function elementInScope($el, $table = false) { |
|
| 3601 | + if(is_array($el)) { |
|
| 3602 | + foreach($el as $element) { |
|
| 3603 | + if($this->elementInScope($element, $table)) { |
|
| 3604 | + return true; |
|
| 3605 | + } |
|
| 3606 | + } |
|
| 3607 | + |
|
| 3608 | + return false; |
|
| 3609 | + } |
|
| 3610 | + |
|
| 3611 | + $leng = count($this->stack); |
|
| 3612 | + |
|
| 3613 | + for($n = 0; $n < $leng; $n++) { |
|
| 3614 | + /* 1. Initialise node to be the current node (the bottommost node of |
|
| 3615 | 3615 | the stack). */ |
| 3616 | - $node = $this->stack[$leng - 1 - $n]; |
|
| 3616 | + $node = $this->stack[$leng - 1 - $n]; |
|
| 3617 | 3617 | |
| 3618 | - if($node->tagName === $el) { |
|
| 3619 | - /* 2. If node is the target node, terminate in a match state. */ |
|
| 3620 | - return true; |
|
| 3618 | + if($node->tagName === $el) { |
|
| 3619 | + /* 2. If node is the target node, terminate in a match state. */ |
|
| 3620 | + return true; |
|
| 3621 | 3621 | |
| 3622 | - } elseif($node->tagName === 'table') { |
|
| 3623 | - /* 3. Otherwise, if node is a table element, terminate in a failure |
|
| 3622 | + } elseif($node->tagName === 'table') { |
|
| 3623 | + /* 3. Otherwise, if node is a table element, terminate in a failure |
|
| 3624 | 3624 | state. */ |
| 3625 | - return false; |
|
| 3625 | + return false; |
|
| 3626 | 3626 | |
| 3627 | - } elseif($table === true && in_array($node->tagName, array('caption', 'td', |
|
| 3628 | - 'th', 'button', 'marquee', 'object'))) { |
|
| 3629 | - /* 4. Otherwise, if the algorithm is the "has an element in scope" |
|
| 3627 | + } elseif($table === true && in_array($node->tagName, array('caption', 'td', |
|
| 3628 | + 'th', 'button', 'marquee', 'object'))) { |
|
| 3629 | + /* 4. Otherwise, if the algorithm is the "has an element in scope" |
|
| 3630 | 3630 | variant (rather than the "has an element in table scope" variant), |
| 3631 | 3631 | and node is one of the following, terminate in a failure state. */ |
| 3632 | - return false; |
|
| 3632 | + return false; |
|
| 3633 | 3633 | |
| 3634 | - } elseif($node === $node->ownerDocument->documentElement) { |
|
| 3635 | - /* 5. Otherwise, if node is an html element (root element), terminate |
|
| 3634 | + } elseif($node === $node->ownerDocument->documentElement) { |
|
| 3635 | + /* 5. Otherwise, if node is an html element (root element), terminate |
|
| 3636 | 3636 | in a failure state. (This can only happen if the node is the topmost |
| 3637 | 3637 | node of the stack of open elements, and prevents the next step from |
| 3638 | 3638 | being invoked if there are no more elements in the stack.) */ |
| 3639 | - return false; |
|
| 3640 | - } |
|
| 3639 | + return false; |
|
| 3640 | + } |
|
| 3641 | 3641 | |
| 3642 | - /* Otherwise, set node to the previous entry in the stack of open |
|
| 3642 | + /* Otherwise, set node to the previous entry in the stack of open |
|
| 3643 | 3643 | elements and return to step 2. (This will never fail, since the loop |
| 3644 | 3644 | will always terminate in the previous step if the top of the stack |
| 3645 | 3645 | is reached.) */ |
| 3646 | - } |
|
| 3647 | - } |
|
| 3646 | + } |
|
| 3647 | + } |
|
| 3648 | 3648 | |
| 3649 | - private function reconstructActiveFormattingElements() { |
|
| 3650 | - /* 1. If there are no entries in the list of active formatting elements, |
|
| 3649 | + private function reconstructActiveFormattingElements() { |
|
| 3650 | + /* 1. If there are no entries in the list of active formatting elements, |
|
| 3651 | 3651 | then there is nothing to reconstruct; stop this algorithm. */ |
| 3652 | - $formatting_elements = count($this->a_formatting); |
|
| 3652 | + $formatting_elements = count($this->a_formatting); |
|
| 3653 | 3653 | |
| 3654 | - if($formatting_elements === 0) { |
|
| 3655 | - return false; |
|
| 3656 | - } |
|
| 3654 | + if($formatting_elements === 0) { |
|
| 3655 | + return false; |
|
| 3656 | + } |
|
| 3657 | 3657 | |
| 3658 | - /* 3. Let entry be the last (most recently added) element in the list |
|
| 3658 | + /* 3. Let entry be the last (most recently added) element in the list |
|
| 3659 | 3659 | of active formatting elements. */ |
| 3660 | - $entry = end($this->a_formatting); |
|
| 3660 | + $entry = end($this->a_formatting); |
|
| 3661 | 3661 | |
| 3662 | - /* 2. If the last (most recently added) entry in the list of active |
|
| 3662 | + /* 2. If the last (most recently added) entry in the list of active |
|
| 3663 | 3663 | formatting elements is a marker, or if it is an element that is in the |
| 3664 | 3664 | stack of open elements, then there is nothing to reconstruct; stop this |
| 3665 | 3665 | algorithm. */ |
| 3666 | - if($entry === self::MARKER || in_array($entry, $this->stack, true)) { |
|
| 3667 | - return false; |
|
| 3668 | - } |
|
| 3666 | + if($entry === self::MARKER || in_array($entry, $this->stack, true)) { |
|
| 3667 | + return false; |
|
| 3668 | + } |
|
| 3669 | 3669 | |
| 3670 | - for($a = $formatting_elements - 1; $a >= 0; true) { |
|
| 3671 | - /* 4. If there are no entries before entry in the list of active |
|
| 3670 | + for($a = $formatting_elements - 1; $a >= 0; true) { |
|
| 3671 | + /* 4. If there are no entries before entry in the list of active |
|
| 3672 | 3672 | formatting elements, then jump to step 8. */ |
| 3673 | - if($a === 0) { |
|
| 3674 | - $step_seven = false; |
|
| 3675 | - break; |
|
| 3676 | - } |
|
| 3673 | + if($a === 0) { |
|
| 3674 | + $step_seven = false; |
|
| 3675 | + break; |
|
| 3676 | + } |
|
| 3677 | 3677 | |
| 3678 | - /* 5. Let entry be the entry one earlier than entry in the list of |
|
| 3678 | + /* 5. Let entry be the entry one earlier than entry in the list of |
|
| 3679 | 3679 | active formatting elements. */ |
| 3680 | - $a--; |
|
| 3681 | - $entry = $this->a_formatting[$a]; |
|
| 3680 | + $a--; |
|
| 3681 | + $entry = $this->a_formatting[$a]; |
|
| 3682 | 3682 | |
| 3683 | - /* 6. If entry is neither a marker nor an element that is also in |
|
| 3683 | + /* 6. If entry is neither a marker nor an element that is also in |
|
| 3684 | 3684 | thetack of open elements, go to step 4. */ |
| 3685 | - if($entry === self::MARKER || in_array($entry, $this->stack, true)) { |
|
| 3686 | - break; |
|
| 3687 | - } |
|
| 3688 | - } |
|
| 3685 | + if($entry === self::MARKER || in_array($entry, $this->stack, true)) { |
|
| 3686 | + break; |
|
| 3687 | + } |
|
| 3688 | + } |
|
| 3689 | 3689 | |
| 3690 | - while(true) { |
|
| 3691 | - /* 7. Let entry be the element one later than entry in the list of |
|
| 3690 | + while(true) { |
|
| 3691 | + /* 7. Let entry be the element one later than entry in the list of |
|
| 3692 | 3692 | active formatting elements. */ |
| 3693 | - if(isset($step_seven) && $step_seven === true) { |
|
| 3694 | - $a++; |
|
| 3695 | - $entry = $this->a_formatting[$a]; |
|
| 3696 | - } |
|
| 3693 | + if(isset($step_seven) && $step_seven === true) { |
|
| 3694 | + $a++; |
|
| 3695 | + $entry = $this->a_formatting[$a]; |
|
| 3696 | + } |
|
| 3697 | 3697 | |
| 3698 | - /* 8. Perform a shallow clone of the element entry to obtain clone. */ |
|
| 3699 | - $clone = $entry->cloneNode(); |
|
| 3698 | + /* 8. Perform a shallow clone of the element entry to obtain clone. */ |
|
| 3699 | + $clone = $entry->cloneNode(); |
|
| 3700 | 3700 | |
| 3701 | - /* 9. Append clone to the current node and push it onto the stack |
|
| 3701 | + /* 9. Append clone to the current node and push it onto the stack |
|
| 3702 | 3702 | of open elements so that it is the new current node. */ |
| 3703 | - end($this->stack)->appendChild($clone); |
|
| 3704 | - $this->stack[] = $clone; |
|
| 3703 | + end($this->stack)->appendChild($clone); |
|
| 3704 | + $this->stack[] = $clone; |
|
| 3705 | 3705 | |
| 3706 | - /* 10. Replace the entry for entry in the list with an entry for |
|
| 3706 | + /* 10. Replace the entry for entry in the list with an entry for |
|
| 3707 | 3707 | clone. */ |
| 3708 | - $this->a_formatting[$a] = $clone; |
|
| 3708 | + $this->a_formatting[$a] = $clone; |
|
| 3709 | 3709 | |
| 3710 | - /* 11. If the entry for clone in the list of active formatting |
|
| 3710 | + /* 11. If the entry for clone in the list of active formatting |
|
| 3711 | 3711 | elements is not the last entry in the list, return to step 7. */ |
| 3712 | - if(end($this->a_formatting) !== $clone) { |
|
| 3713 | - $step_seven = true; |
|
| 3714 | - } else { |
|
| 3715 | - break; |
|
| 3716 | - } |
|
| 3717 | - } |
|
| 3718 | - } |
|
| 3719 | - |
|
| 3720 | - private function clearTheActiveFormattingElementsUpToTheLastMarker() { |
|
| 3721 | - /* When the steps below require the UA to clear the list of active |
|
| 3712 | + if(end($this->a_formatting) !== $clone) { |
|
| 3713 | + $step_seven = true; |
|
| 3714 | + } else { |
|
| 3715 | + break; |
|
| 3716 | + } |
|
| 3717 | + } |
|
| 3718 | + } |
|
| 3719 | + |
|
| 3720 | + private function clearTheActiveFormattingElementsUpToTheLastMarker() { |
|
| 3721 | + /* When the steps below require the UA to clear the list of active |
|
| 3722 | 3722 | formatting elements up to the last marker, the UA must perform the |
| 3723 | 3723 | following steps: */ |
| 3724 | 3724 | |
| 3725 | - while(true) { |
|
| 3726 | - /* 1. Let entry be the last (most recently added) entry in the list |
|
| 3725 | + while(true) { |
|
| 3726 | + /* 1. Let entry be the last (most recently added) entry in the list |
|
| 3727 | 3727 | of active formatting elements. */ |
| 3728 | - $entry = end($this->a_formatting); |
|
| 3728 | + $entry = end($this->a_formatting); |
|
| 3729 | 3729 | |
| 3730 | - /* 2. Remove entry from the list of active formatting elements. */ |
|
| 3731 | - array_pop($this->a_formatting); |
|
| 3730 | + /* 2. Remove entry from the list of active formatting elements. */ |
|
| 3731 | + array_pop($this->a_formatting); |
|
| 3732 | 3732 | |
| 3733 | - /* 3. If entry was a marker, then stop the algorithm at this point. |
|
| 3733 | + /* 3. If entry was a marker, then stop the algorithm at this point. |
|
| 3734 | 3734 | The list has been cleared up to the last marker. */ |
| 3735 | - if($entry === self::MARKER) { |
|
| 3736 | - break; |
|
| 3737 | - } |
|
| 3738 | - } |
|
| 3739 | - } |
|
| 3740 | - |
|
| 3741 | - private function generateImpliedEndTags($exclude = array()) { |
|
| 3742 | - /* When the steps below require the UA to generate implied end tags, |
|
| 3735 | + if($entry === self::MARKER) { |
|
| 3736 | + break; |
|
| 3737 | + } |
|
| 3738 | + } |
|
| 3739 | + } |
|
| 3740 | + |
|
| 3741 | + private function generateImpliedEndTags($exclude = array()) { |
|
| 3742 | + /* When the steps below require the UA to generate implied end tags, |
|
| 3743 | 3743 | then, if the current node is a dd element, a dt element, an li element, |
| 3744 | 3744 | a p element, a td element, a th element, or a tr element, the UA must |
| 3745 | 3745 | act as if an end tag with the respective tag name had been seen and |
| 3746 | 3746 | then generate implied end tags again. */ |
| 3747 | - $node = end($this->stack); |
|
| 3748 | - $elements = array_diff(array('dd', 'dt', 'li', 'p', 'td', 'th', 'tr'), $exclude); |
|
| 3747 | + $node = end($this->stack); |
|
| 3748 | + $elements = array_diff(array('dd', 'dt', 'li', 'p', 'td', 'th', 'tr'), $exclude); |
|
| 3749 | 3749 | |
| 3750 | - while(in_array(end($this->stack)->nodeName, $elements)) { |
|
| 3751 | - array_pop($this->stack); |
|
| 3752 | - } |
|
| 3753 | - } |
|
| 3750 | + while(in_array(end($this->stack)->nodeName, $elements)) { |
|
| 3751 | + array_pop($this->stack); |
|
| 3752 | + } |
|
| 3753 | + } |
|
| 3754 | 3754 | |
| 3755 | - private function getElementCategory($node) { |
|
| 3756 | - $name = $node->tagName; |
|
| 3757 | - if(in_array($name, $this->special)) |
|
| 3758 | - return self::SPECIAL; |
|
| 3755 | + private function getElementCategory($node) { |
|
| 3756 | + $name = $node->tagName; |
|
| 3757 | + if(in_array($name, $this->special)) |
|
| 3758 | + return self::SPECIAL; |
|
| 3759 | 3759 | |
| 3760 | - elseif(in_array($name, $this->scoping)) |
|
| 3761 | - return self::SCOPING; |
|
| 3760 | + elseif(in_array($name, $this->scoping)) |
|
| 3761 | + return self::SCOPING; |
|
| 3762 | 3762 | |
| 3763 | - elseif(in_array($name, $this->formatting)) |
|
| 3764 | - return self::FORMATTING; |
|
| 3763 | + elseif(in_array($name, $this->formatting)) |
|
| 3764 | + return self::FORMATTING; |
|
| 3765 | 3765 | |
| 3766 | - else |
|
| 3767 | - return self::PHRASING; |
|
| 3768 | - } |
|
| 3766 | + else |
|
| 3767 | + return self::PHRASING; |
|
| 3768 | + } |
|
| 3769 | 3769 | |
| 3770 | - private function clearStackToTableContext($elements) { |
|
| 3771 | - /* When the steps above require the UA to clear the stack back to a |
|
| 3770 | + private function clearStackToTableContext($elements) { |
|
| 3771 | + /* When the steps above require the UA to clear the stack back to a |
|
| 3772 | 3772 | table context, it means that the UA must, while the current node is not |
| 3773 | 3773 | a table element or an html element, pop elements from the stack of open |
| 3774 | 3774 | elements. If this causes any elements to be popped from the stack, then |
| 3775 | 3775 | this is a parse error. */ |
| 3776 | - while(true) { |
|
| 3777 | - $node = end($this->stack)->nodeName; |
|
| 3778 | - |
|
| 3779 | - if(in_array($node, $elements)) { |
|
| 3780 | - break; |
|
| 3781 | - } else { |
|
| 3782 | - array_pop($this->stack); |
|
| 3783 | - } |
|
| 3784 | - } |
|
| 3785 | - } |
|
| 3786 | - |
|
| 3787 | - private function resetInsertionMode() { |
|
| 3788 | - /* 1. Let last be false. */ |
|
| 3789 | - $last = false; |
|
| 3790 | - $leng = count($this->stack); |
|
| 3791 | - |
|
| 3792 | - for($n = $leng - 1; $n >= 0; $n--) { |
|
| 3793 | - /* 2. Let node be the last node in the stack of open elements. */ |
|
| 3794 | - $node = $this->stack[$n]; |
|
| 3795 | - |
|
| 3796 | - /* 3. If node is the first node in the stack of open elements, then |
|
| 3776 | + while(true) { |
|
| 3777 | + $node = end($this->stack)->nodeName; |
|
| 3778 | + |
|
| 3779 | + if(in_array($node, $elements)) { |
|
| 3780 | + break; |
|
| 3781 | + } else { |
|
| 3782 | + array_pop($this->stack); |
|
| 3783 | + } |
|
| 3784 | + } |
|
| 3785 | + } |
|
| 3786 | + |
|
| 3787 | + private function resetInsertionMode() { |
|
| 3788 | + /* 1. Let last be false. */ |
|
| 3789 | + $last = false; |
|
| 3790 | + $leng = count($this->stack); |
|
| 3791 | + |
|
| 3792 | + for($n = $leng - 1; $n >= 0; $n--) { |
|
| 3793 | + /* 2. Let node be the last node in the stack of open elements. */ |
|
| 3794 | + $node = $this->stack[$n]; |
|
| 3795 | + |
|
| 3796 | + /* 3. If node is the first node in the stack of open elements, then |
|
| 3797 | 3797 | set last to true. If the element whose innerHTML attribute is being |
| 3798 | 3798 | set is neither a td element nor a th element, then set node to the |
| 3799 | 3799 | element whose innerHTML attribute is being set. (innerHTML case) */ |
| 3800 | - if($this->stack[0]->isSameNode($node)) { |
|
| 3801 | - $last = true; |
|
| 3802 | - } |
|
| 3800 | + if($this->stack[0]->isSameNode($node)) { |
|
| 3801 | + $last = true; |
|
| 3802 | + } |
|
| 3803 | 3803 | |
| 3804 | - /* 4. If node is a select element, then switch the insertion mode to |
|
| 3804 | + /* 4. If node is a select element, then switch the insertion mode to |
|
| 3805 | 3805 | "in select" and abort these steps. (innerHTML case) */ |
| 3806 | - if($node->nodeName === 'select') { |
|
| 3807 | - $this->mode = self::IN_SELECT; |
|
| 3808 | - break; |
|
| 3806 | + if($node->nodeName === 'select') { |
|
| 3807 | + $this->mode = self::IN_SELECT; |
|
| 3808 | + break; |
|
| 3809 | 3809 | |
| 3810 | - /* 5. If node is a td or th element, then switch the insertion mode |
|
| 3810 | + /* 5. If node is a td or th element, then switch the insertion mode |
|
| 3811 | 3811 | to "in cell" and abort these steps. */ |
| 3812 | - } elseif($node->nodeName === 'td' || $node->nodeName === 'th') { |
|
| 3813 | - $this->mode = self::IN_CELL; |
|
| 3814 | - break; |
|
| 3812 | + } elseif($node->nodeName === 'td' || $node->nodeName === 'th') { |
|
| 3813 | + $this->mode = self::IN_CELL; |
|
| 3814 | + break; |
|
| 3815 | 3815 | |
| 3816 | - /* 6. If node is a tr element, then switch the insertion mode to |
|
| 3816 | + /* 6. If node is a tr element, then switch the insertion mode to |
|
| 3817 | 3817 | "in row" and abort these steps. */ |
| 3818 | - } elseif($node->nodeName === 'tr') { |
|
| 3819 | - $this->mode = self::IN_ROW; |
|
| 3820 | - break; |
|
| 3818 | + } elseif($node->nodeName === 'tr') { |
|
| 3819 | + $this->mode = self::IN_ROW; |
|
| 3820 | + break; |
|
| 3821 | 3821 | |
| 3822 | - /* 7. If node is a tbody, thead, or tfoot element, then switch the |
|
| 3822 | + /* 7. If node is a tbody, thead, or tfoot element, then switch the |
|
| 3823 | 3823 | insertion mode to "in table body" and abort these steps. */ |
| 3824 | - } elseif(in_array($node->nodeName, array('tbody', 'thead', 'tfoot'))) { |
|
| 3825 | - $this->mode = self::IN_TBODY; |
|
| 3826 | - break; |
|
| 3824 | + } elseif(in_array($node->nodeName, array('tbody', 'thead', 'tfoot'))) { |
|
| 3825 | + $this->mode = self::IN_TBODY; |
|
| 3826 | + break; |
|
| 3827 | 3827 | |
| 3828 | - /* 8. If node is a caption element, then switch the insertion mode |
|
| 3828 | + /* 8. If node is a caption element, then switch the insertion mode |
|
| 3829 | 3829 | to "in caption" and abort these steps. */ |
| 3830 | - } elseif($node->nodeName === 'caption') { |
|
| 3831 | - $this->mode = self::IN_CAPTION; |
|
| 3832 | - break; |
|
| 3830 | + } elseif($node->nodeName === 'caption') { |
|
| 3831 | + $this->mode = self::IN_CAPTION; |
|
| 3832 | + break; |
|
| 3833 | 3833 | |
| 3834 | - /* 9. If node is a colgroup element, then switch the insertion mode |
|
| 3834 | + /* 9. If node is a colgroup element, then switch the insertion mode |
|
| 3835 | 3835 | to "in column group" and abort these steps. (innerHTML case) */ |
| 3836 | - } elseif($node->nodeName === 'colgroup') { |
|
| 3837 | - $this->mode = self::IN_CGROUP; |
|
| 3838 | - break; |
|
| 3836 | + } elseif($node->nodeName === 'colgroup') { |
|
| 3837 | + $this->mode = self::IN_CGROUP; |
|
| 3838 | + break; |
|
| 3839 | 3839 | |
| 3840 | - /* 10. If node is a table element, then switch the insertion mode |
|
| 3840 | + /* 10. If node is a table element, then switch the insertion mode |
|
| 3841 | 3841 | to "in table" and abort these steps. */ |
| 3842 | - } elseif($node->nodeName === 'table') { |
|
| 3843 | - $this->mode = self::IN_TABLE; |
|
| 3844 | - break; |
|
| 3842 | + } elseif($node->nodeName === 'table') { |
|
| 3843 | + $this->mode = self::IN_TABLE; |
|
| 3844 | + break; |
|
| 3845 | 3845 | |
| 3846 | - /* 11. If node is a head element, then switch the insertion mode |
|
| 3846 | + /* 11. If node is a head element, then switch the insertion mode |
|
| 3847 | 3847 | to "in body" ("in body"! not "in head"!) and abort these steps. |
| 3848 | 3848 | (innerHTML case) */ |
| 3849 | - } elseif($node->nodeName === 'head') { |
|
| 3850 | - $this->mode = self::IN_BODY; |
|
| 3851 | - break; |
|
| 3849 | + } elseif($node->nodeName === 'head') { |
|
| 3850 | + $this->mode = self::IN_BODY; |
|
| 3851 | + break; |
|
| 3852 | 3852 | |
| 3853 | - /* 12. If node is a body element, then switch the insertion mode to |
|
| 3853 | + /* 12. If node is a body element, then switch the insertion mode to |
|
| 3854 | 3854 | "in body" and abort these steps. */ |
| 3855 | - } elseif($node->nodeName === 'body') { |
|
| 3856 | - $this->mode = self::IN_BODY; |
|
| 3857 | - break; |
|
| 3855 | + } elseif($node->nodeName === 'body') { |
|
| 3856 | + $this->mode = self::IN_BODY; |
|
| 3857 | + break; |
|
| 3858 | 3858 | |
| 3859 | - /* 13. If node is a frameset element, then switch the insertion |
|
| 3859 | + /* 13. If node is a frameset element, then switch the insertion |
|
| 3860 | 3860 | mode to "in frameset" and abort these steps. (innerHTML case) */ |
| 3861 | - } elseif($node->nodeName === 'frameset') { |
|
| 3862 | - $this->mode = self::IN_FRAME; |
|
| 3863 | - break; |
|
| 3861 | + } elseif($node->nodeName === 'frameset') { |
|
| 3862 | + $this->mode = self::IN_FRAME; |
|
| 3863 | + break; |
|
| 3864 | 3864 | |
| 3865 | - /* 14. If node is an html element, then: if the head element |
|
| 3865 | + /* 14. If node is an html element, then: if the head element |
|
| 3866 | 3866 | pointer is null, switch the insertion mode to "before head", |
| 3867 | 3867 | otherwise, switch the insertion mode to "after head". In either |
| 3868 | 3868 | case, abort these steps. (innerHTML case) */ |
| 3869 | - } elseif($node->nodeName === 'html') { |
|
| 3870 | - $this->mode = ($this->head_pointer === null) |
|
| 3871 | - ? self::BEFOR_HEAD |
|
| 3872 | - : self::AFTER_HEAD; |
|
| 3869 | + } elseif($node->nodeName === 'html') { |
|
| 3870 | + $this->mode = ($this->head_pointer === null) |
|
| 3871 | + ? self::BEFOR_HEAD |
|
| 3872 | + : self::AFTER_HEAD; |
|
| 3873 | 3873 | |
| 3874 | - break; |
|
| 3874 | + break; |
|
| 3875 | 3875 | |
| 3876 | - /* 15. If last is true, then set the insertion mode to "in body" |
|
| 3876 | + /* 15. If last is true, then set the insertion mode to "in body" |
|
| 3877 | 3877 | and abort these steps. (innerHTML case) */ |
| 3878 | - } elseif($last) { |
|
| 3879 | - $this->mode = self::IN_BODY; |
|
| 3880 | - break; |
|
| 3881 | - } |
|
| 3882 | - } |
|
| 3883 | - } |
|
| 3884 | - |
|
| 3885 | - private function closeCell() { |
|
| 3886 | - /* If the stack of open elements has a td or th element in table scope, |
|
| 3878 | + } elseif($last) { |
|
| 3879 | + $this->mode = self::IN_BODY; |
|
| 3880 | + break; |
|
| 3881 | + } |
|
| 3882 | + } |
|
| 3883 | + } |
|
| 3884 | + |
|
| 3885 | + private function closeCell() { |
|
| 3886 | + /* If the stack of open elements has a td or th element in table scope, |
|
| 3887 | 3887 | then act as if an end tag token with that tag name had been seen. */ |
| 3888 | - foreach(array('td', 'th') as $cell) { |
|
| 3889 | - if($this->elementInScope($cell, true)) { |
|
| 3890 | - $this->inCell(array( |
|
| 3891 | - 'name' => $cell, |
|
| 3892 | - 'type' => HTML5::ENDTAG |
|
| 3893 | - )); |
|
| 3894 | - |
|
| 3895 | - break; |
|
| 3896 | - } |
|
| 3897 | - } |
|
| 3898 | - } |
|
| 3899 | - |
|
| 3900 | - public function save() { |
|
| 3901 | - return $this->dom; |
|
| 3902 | - } |
|
| 3888 | + foreach(array('td', 'th') as $cell) { |
|
| 3889 | + if($this->elementInScope($cell, true)) { |
|
| 3890 | + $this->inCell(array( |
|
| 3891 | + 'name' => $cell, |
|
| 3892 | + 'type' => HTML5::ENDTAG |
|
| 3893 | + )); |
|
| 3894 | + |
|
| 3895 | + break; |
|
| 3896 | + } |
|
| 3897 | + } |
|
| 3898 | + } |
|
| 3899 | + |
|
| 3900 | + public function save() { |
|
| 3901 | + return $this->dom; |
|
| 3902 | + } |
|
| 3903 | 3903 | } |
| 3904 | 3904 | ?> |
@@ -3542,7 +3542,10 @@ discard block |
||
| 3542 | 3542 | // Remove leading hyphens and numbers |
| 3543 | 3543 | $token['name'] = ltrim($token['name'], '-0..9'); |
| 3544 | 3544 | // In theory, this should ever be needed, but just in case |
| 3545 | - if ($token['name'] === '') $token['name'] = 'span'; // arbitrary generic choice |
|
| 3545 | + if ($token['name'] === '') { |
|
| 3546 | + $token['name'] = 'span'; |
|
| 3547 | + } |
|
| 3548 | + // arbitrary generic choice |
|
| 3546 | 3549 | } |
| 3547 | 3550 | |
| 3548 | 3551 | $el = $this->dom->createElement($token['name']); |
@@ -3588,10 +3591,11 @@ discard block |
||
| 3588 | 3591 | } |
| 3589 | 3592 | } |
| 3590 | 3593 | |
| 3591 | - if(isset($table) && $this->foster_parent->isSameNode($table->parentNode)) |
|
| 3592 | - $this->foster_parent->insertBefore($node, $table); |
|
| 3593 | - else |
|
| 3594 | - $this->foster_parent->appendChild($node); |
|
| 3594 | + if(isset($table) && $this->foster_parent->isSameNode($table->parentNode)) { |
|
| 3595 | + $this->foster_parent->insertBefore($node, $table); |
|
| 3596 | + } else { |
|
| 3597 | + $this->foster_parent->appendChild($node); |
|
| 3598 | + } |
|
| 3595 | 3599 | |
| 3596 | 3600 | $this->foster_parent = null; |
| 3597 | 3601 | } |
@@ -3754,17 +3758,15 @@ discard block |
||
| 3754 | 3758 | |
| 3755 | 3759 | private function getElementCategory($node) { |
| 3756 | 3760 | $name = $node->tagName; |
| 3757 | - if(in_array($name, $this->special)) |
|
| 3758 | - return self::SPECIAL; |
|
| 3759 | - |
|
| 3760 | - elseif(in_array($name, $this->scoping)) |
|
| 3761 | - return self::SCOPING; |
|
| 3762 | - |
|
| 3763 | - elseif(in_array($name, $this->formatting)) |
|
| 3764 | - return self::FORMATTING; |
|
| 3765 | - |
|
| 3766 | - else |
|
| 3767 | - return self::PHRASING; |
|
| 3761 | + if(in_array($name, $this->special)) { |
|
| 3762 | + return self::SPECIAL; |
|
| 3763 | + } elseif(in_array($name, $this->scoping)) { |
|
| 3764 | + return self::SCOPING; |
|
| 3765 | + } elseif(in_array($name, $this->formatting)) { |
|
| 3766 | + return self::FORMATTING; |
|
| 3767 | + } else { |
|
| 3768 | + return self::PHRASING; |
|
| 3769 | + } |
|
| 3768 | 3770 | } |
| 3769 | 3771 | |
| 3770 | 3772 | private function clearStackToTableContext($elements) { |
@@ -69,48 +69,48 @@ discard block |
||
| 69 | 69 | private $token; |
| 70 | 70 | private $content_model; |
| 71 | 71 | private $escape = false; |
| 72 | - private $entities = array('AElig;','AElig','AMP;','AMP','Aacute;','Aacute', |
|
| 73 | - 'Acirc;','Acirc','Agrave;','Agrave','Alpha;','Aring;','Aring','Atilde;', |
|
| 74 | - 'Atilde','Auml;','Auml','Beta;','COPY;','COPY','Ccedil;','Ccedil','Chi;', |
|
| 75 | - 'Dagger;','Delta;','ETH;','ETH','Eacute;','Eacute','Ecirc;','Ecirc','Egrave;', |
|
| 76 | - 'Egrave','Epsilon;','Eta;','Euml;','Euml','GT;','GT','Gamma;','Iacute;', |
|
| 77 | - 'Iacute','Icirc;','Icirc','Igrave;','Igrave','Iota;','Iuml;','Iuml','Kappa;', |
|
| 78 | - 'LT;','LT','Lambda;','Mu;','Ntilde;','Ntilde','Nu;','OElig;','Oacute;', |
|
| 79 | - 'Oacute','Ocirc;','Ocirc','Ograve;','Ograve','Omega;','Omicron;','Oslash;', |
|
| 80 | - 'Oslash','Otilde;','Otilde','Ouml;','Ouml','Phi;','Pi;','Prime;','Psi;', |
|
| 81 | - 'QUOT;','QUOT','REG;','REG','Rho;','Scaron;','Sigma;','THORN;','THORN', |
|
| 82 | - 'TRADE;','Tau;','Theta;','Uacute;','Uacute','Ucirc;','Ucirc','Ugrave;', |
|
| 83 | - 'Ugrave','Upsilon;','Uuml;','Uuml','Xi;','Yacute;','Yacute','Yuml;','Zeta;', |
|
| 84 | - 'aacute;','aacute','acirc;','acirc','acute;','acute','aelig;','aelig', |
|
| 85 | - 'agrave;','agrave','alefsym;','alpha;','amp;','amp','and;','ang;','apos;', |
|
| 86 | - 'aring;','aring','asymp;','atilde;','atilde','auml;','auml','bdquo;','beta;', |
|
| 87 | - 'brvbar;','brvbar','bull;','cap;','ccedil;','ccedil','cedil;','cedil', |
|
| 88 | - 'cent;','cent','chi;','circ;','clubs;','cong;','copy;','copy','crarr;', |
|
| 89 | - 'cup;','curren;','curren','dArr;','dagger;','darr;','deg;','deg','delta;', |
|
| 90 | - 'diams;','divide;','divide','eacute;','eacute','ecirc;','ecirc','egrave;', |
|
| 91 | - 'egrave','empty;','emsp;','ensp;','epsilon;','equiv;','eta;','eth;','eth', |
|
| 92 | - 'euml;','euml','euro;','exist;','fnof;','forall;','frac12;','frac12', |
|
| 93 | - 'frac14;','frac14','frac34;','frac34','frasl;','gamma;','ge;','gt;','gt', |
|
| 94 | - 'hArr;','harr;','hearts;','hellip;','iacute;','iacute','icirc;','icirc', |
|
| 95 | - 'iexcl;','iexcl','igrave;','igrave','image;','infin;','int;','iota;', |
|
| 96 | - 'iquest;','iquest','isin;','iuml;','iuml','kappa;','lArr;','lambda;','lang;', |
|
| 97 | - 'laquo;','laquo','larr;','lceil;','ldquo;','le;','lfloor;','lowast;','loz;', |
|
| 98 | - 'lrm;','lsaquo;','lsquo;','lt;','lt','macr;','macr','mdash;','micro;','micro', |
|
| 99 | - 'middot;','middot','minus;','mu;','nabla;','nbsp;','nbsp','ndash;','ne;', |
|
| 100 | - 'ni;','not;','not','notin;','nsub;','ntilde;','ntilde','nu;','oacute;', |
|
| 101 | - 'oacute','ocirc;','ocirc','oelig;','ograve;','ograve','oline;','omega;', |
|
| 102 | - 'omicron;','oplus;','or;','ordf;','ordf','ordm;','ordm','oslash;','oslash', |
|
| 103 | - 'otilde;','otilde','otimes;','ouml;','ouml','para;','para','part;','permil;', |
|
| 104 | - 'perp;','phi;','pi;','piv;','plusmn;','plusmn','pound;','pound','prime;', |
|
| 105 | - 'prod;','prop;','psi;','quot;','quot','rArr;','radic;','rang;','raquo;', |
|
| 106 | - 'raquo','rarr;','rceil;','rdquo;','real;','reg;','reg','rfloor;','rho;', |
|
| 107 | - 'rlm;','rsaquo;','rsquo;','sbquo;','scaron;','sdot;','sect;','sect','shy;', |
|
| 108 | - 'shy','sigma;','sigmaf;','sim;','spades;','sub;','sube;','sum;','sup1;', |
|
| 109 | - 'sup1','sup2;','sup2','sup3;','sup3','sup;','supe;','szlig;','szlig','tau;', |
|
| 110 | - 'there4;','theta;','thetasym;','thinsp;','thorn;','thorn','tilde;','times;', |
|
| 111 | - 'times','trade;','uArr;','uacute;','uacute','uarr;','ucirc;','ucirc', |
|
| 112 | - 'ugrave;','ugrave','uml;','uml','upsih;','upsilon;','uuml;','uuml','weierp;', |
|
| 113 | - 'xi;','yacute;','yacute','yen;','yen','yuml;','yuml','zeta;','zwj;','zwnj;'); |
|
| 72 | + private $entities = array('AElig;', 'AElig', 'AMP;', 'AMP', 'Aacute;', 'Aacute', |
|
| 73 | + 'Acirc;', 'Acirc', 'Agrave;', 'Agrave', 'Alpha;', 'Aring;', 'Aring', 'Atilde;', |
|
| 74 | + 'Atilde', 'Auml;', 'Auml', 'Beta;', 'COPY;', 'COPY', 'Ccedil;', 'Ccedil', 'Chi;', |
|
| 75 | + 'Dagger;', 'Delta;', 'ETH;', 'ETH', 'Eacute;', 'Eacute', 'Ecirc;', 'Ecirc', 'Egrave;', |
|
| 76 | + 'Egrave', 'Epsilon;', 'Eta;', 'Euml;', 'Euml', 'GT;', 'GT', 'Gamma;', 'Iacute;', |
|
| 77 | + 'Iacute', 'Icirc;', 'Icirc', 'Igrave;', 'Igrave', 'Iota;', 'Iuml;', 'Iuml', 'Kappa;', |
|
| 78 | + 'LT;', 'LT', 'Lambda;', 'Mu;', 'Ntilde;', 'Ntilde', 'Nu;', 'OElig;', 'Oacute;', |
|
| 79 | + 'Oacute', 'Ocirc;', 'Ocirc', 'Ograve;', 'Ograve', 'Omega;', 'Omicron;', 'Oslash;', |
|
| 80 | + 'Oslash', 'Otilde;', 'Otilde', 'Ouml;', 'Ouml', 'Phi;', 'Pi;', 'Prime;', 'Psi;', |
|
| 81 | + 'QUOT;', 'QUOT', 'REG;', 'REG', 'Rho;', 'Scaron;', 'Sigma;', 'THORN;', 'THORN', |
|
| 82 | + 'TRADE;', 'Tau;', 'Theta;', 'Uacute;', 'Uacute', 'Ucirc;', 'Ucirc', 'Ugrave;', |
|
| 83 | + 'Ugrave', 'Upsilon;', 'Uuml;', 'Uuml', 'Xi;', 'Yacute;', 'Yacute', 'Yuml;', 'Zeta;', |
|
| 84 | + 'aacute;', 'aacute', 'acirc;', 'acirc', 'acute;', 'acute', 'aelig;', 'aelig', |
|
| 85 | + 'agrave;', 'agrave', 'alefsym;', 'alpha;', 'amp;', 'amp', 'and;', 'ang;', 'apos;', |
|
| 86 | + 'aring;', 'aring', 'asymp;', 'atilde;', 'atilde', 'auml;', 'auml', 'bdquo;', 'beta;', |
|
| 87 | + 'brvbar;', 'brvbar', 'bull;', 'cap;', 'ccedil;', 'ccedil', 'cedil;', 'cedil', |
|
| 88 | + 'cent;', 'cent', 'chi;', 'circ;', 'clubs;', 'cong;', 'copy;', 'copy', 'crarr;', |
|
| 89 | + 'cup;', 'curren;', 'curren', 'dArr;', 'dagger;', 'darr;', 'deg;', 'deg', 'delta;', |
|
| 90 | + 'diams;', 'divide;', 'divide', 'eacute;', 'eacute', 'ecirc;', 'ecirc', 'egrave;', |
|
| 91 | + 'egrave', 'empty;', 'emsp;', 'ensp;', 'epsilon;', 'equiv;', 'eta;', 'eth;', 'eth', |
|
| 92 | + 'euml;', 'euml', 'euro;', 'exist;', 'fnof;', 'forall;', 'frac12;', 'frac12', |
|
| 93 | + 'frac14;', 'frac14', 'frac34;', 'frac34', 'frasl;', 'gamma;', 'ge;', 'gt;', 'gt', |
|
| 94 | + 'hArr;', 'harr;', 'hearts;', 'hellip;', 'iacute;', 'iacute', 'icirc;', 'icirc', |
|
| 95 | + 'iexcl;', 'iexcl', 'igrave;', 'igrave', 'image;', 'infin;', 'int;', 'iota;', |
|
| 96 | + 'iquest;', 'iquest', 'isin;', 'iuml;', 'iuml', 'kappa;', 'lArr;', 'lambda;', 'lang;', |
|
| 97 | + 'laquo;', 'laquo', 'larr;', 'lceil;', 'ldquo;', 'le;', 'lfloor;', 'lowast;', 'loz;', |
|
| 98 | + 'lrm;', 'lsaquo;', 'lsquo;', 'lt;', 'lt', 'macr;', 'macr', 'mdash;', 'micro;', 'micro', |
|
| 99 | + 'middot;', 'middot', 'minus;', 'mu;', 'nabla;', 'nbsp;', 'nbsp', 'ndash;', 'ne;', |
|
| 100 | + 'ni;', 'not;', 'not', 'notin;', 'nsub;', 'ntilde;', 'ntilde', 'nu;', 'oacute;', |
|
| 101 | + 'oacute', 'ocirc;', 'ocirc', 'oelig;', 'ograve;', 'ograve', 'oline;', 'omega;', |
|
| 102 | + 'omicron;', 'oplus;', 'or;', 'ordf;', 'ordf', 'ordm;', 'ordm', 'oslash;', 'oslash', |
|
| 103 | + 'otilde;', 'otilde', 'otimes;', 'ouml;', 'ouml', 'para;', 'para', 'part;', 'permil;', |
|
| 104 | + 'perp;', 'phi;', 'pi;', 'piv;', 'plusmn;', 'plusmn', 'pound;', 'pound', 'prime;', |
|
| 105 | + 'prod;', 'prop;', 'psi;', 'quot;', 'quot', 'rArr;', 'radic;', 'rang;', 'raquo;', |
|
| 106 | + 'raquo', 'rarr;', 'rceil;', 'rdquo;', 'real;', 'reg;', 'reg', 'rfloor;', 'rho;', |
|
| 107 | + 'rlm;', 'rsaquo;', 'rsquo;', 'sbquo;', 'scaron;', 'sdot;', 'sect;', 'sect', 'shy;', |
|
| 108 | + 'shy', 'sigma;', 'sigmaf;', 'sim;', 'spades;', 'sub;', 'sube;', 'sum;', 'sup1;', |
|
| 109 | + 'sup1', 'sup2;', 'sup2', 'sup3;', 'sup3', 'sup;', 'supe;', 'szlig;', 'szlig', 'tau;', |
|
| 110 | + 'there4;', 'theta;', 'thetasym;', 'thinsp;', 'thorn;', 'thorn', 'tilde;', 'times;', |
|
| 111 | + 'times', 'trade;', 'uArr;', 'uacute;', 'uacute', 'uarr;', 'ucirc;', 'ucirc', |
|
| 112 | + 'ugrave;', 'ugrave', 'uml;', 'uml', 'upsih;', 'upsilon;', 'uuml;', 'uuml', 'weierp;', |
|
| 113 | + 'xi;', 'yacute;', 'yacute', 'yen;', 'yen', 'yuml;', 'yuml', 'zeta;', 'zwj;', 'zwnj;'); |
|
| 114 | 114 | |
| 115 | 115 | const PCDATA = 0; |
| 116 | 116 | const RCDATA = 1; |
@@ -134,7 +134,7 @@ discard block |
||
| 134 | 134 | |
| 135 | 135 | $this->state = 'data'; |
| 136 | 136 | |
| 137 | - while($this->state !== null) { |
|
| 137 | + while ($this->state !== null) { |
|
| 138 | 138 | $this->{$this->state.'State'}(); |
| 139 | 139 | } |
| 140 | 140 | } |
@@ -150,8 +150,8 @@ discard block |
||
| 150 | 150 | } |
| 151 | 151 | |
| 152 | 152 | private function character($s, $l = 0) { |
| 153 | - if($s + $l < $this->EOF) { |
|
| 154 | - if($l === 0) { |
|
| 153 | + if ($s + $l < $this->EOF) { |
|
| 154 | + if ($l === 0) { |
|
| 155 | 155 | return $this->data[$s]; |
| 156 | 156 | } else { |
| 157 | 157 | return substr($this->data, $s, $l); |
@@ -168,21 +168,21 @@ discard block |
||
| 168 | 168 | $this->char++; |
| 169 | 169 | $char = $this->char(); |
| 170 | 170 | |
| 171 | - if($char === '&' && ($this->content_model === self::PCDATA || $this->content_model === self::RCDATA)) { |
|
| 171 | + if ($char === '&' && ($this->content_model === self::PCDATA || $this->content_model === self::RCDATA)) { |
|
| 172 | 172 | /* U+0026 AMPERSAND (&) |
| 173 | 173 | When the content model flag is set to one of the PCDATA or RCDATA |
| 174 | 174 | states: switch to the entity data state. Otherwise: treat it as per |
| 175 | 175 | the "anything else" entry below. */ |
| 176 | 176 | $this->state = 'entityData'; |
| 177 | 177 | |
| 178 | - } elseif($char === '-') { |
|
| 178 | + } elseif ($char === '-') { |
|
| 179 | 179 | /* If the content model flag is set to either the RCDATA state or |
| 180 | 180 | the CDATA state, and the escape flag is false, and there are at |
| 181 | 181 | least three characters before this one in the input stream, and the |
| 182 | 182 | last four characters in the input stream, including this one, are |
| 183 | 183 | U+003C LESS-THAN SIGN, U+0021 EXCLAMATION MARK, U+002D HYPHEN-MINUS, |
| 184 | 184 | and U+002D HYPHEN-MINUS ("<!--"), then set the escape flag to true. */ |
| 185 | - if(($this->content_model === self::RCDATA || $this->content_model === |
|
| 185 | + if (($this->content_model === self::RCDATA || $this->content_model === |
|
| 186 | 186 | self::CDATA) && $this->escape === false && |
| 187 | 187 | $this->char >= 3 && $this->character($this->char - 4, 4) === '<!--') { |
| 188 | 188 | $this->escape = true; |
@@ -196,7 +196,7 @@ discard block |
||
| 196 | 196 | )); |
| 197 | 197 | |
| 198 | 198 | /* U+003C LESS-THAN SIGN (<) */ |
| 199 | - } elseif($char === '<' && ($this->content_model === self::PCDATA || |
|
| 199 | + } elseif ($char === '<' && ($this->content_model === self::PCDATA || |
|
| 200 | 200 | (($this->content_model === self::RCDATA || |
| 201 | 201 | $this->content_model === self::CDATA) && $this->escape === false))) { |
| 202 | 202 | /* When the content model flag is set to the PCDATA state: switch |
@@ -210,13 +210,13 @@ discard block |
||
| 210 | 210 | $this->state = 'tagOpen'; |
| 211 | 211 | |
| 212 | 212 | /* U+003E GREATER-THAN SIGN (>) */ |
| 213 | - } elseif($char === '>') { |
|
| 213 | + } elseif ($char === '>') { |
|
| 214 | 214 | /* If the content model flag is set to either the RCDATA state or |
| 215 | 215 | the CDATA state, and the escape flag is true, and the last three |
| 216 | 216 | characters in the input stream including this one are U+002D |
| 217 | 217 | HYPHEN-MINUS, U+002D HYPHEN-MINUS, U+003E GREATER-THAN SIGN ("-->"), |
| 218 | 218 | set the escape flag to false. */ |
| 219 | - if(($this->content_model === self::RCDATA || |
|
| 219 | + if (($this->content_model === self::RCDATA || |
|
| 220 | 220 | $this->content_model === self::CDATA) && $this->escape === true && |
| 221 | 221 | $this->character($this->char, 3) === '-->') { |
| 222 | 222 | $this->escape = false; |
@@ -229,12 +229,12 @@ discard block |
||
| 229 | 229 | 'data' => $char |
| 230 | 230 | )); |
| 231 | 231 | |
| 232 | - } elseif($this->char === $this->EOF) { |
|
| 232 | + } elseif ($this->char === $this->EOF) { |
|
| 233 | 233 | /* EOF |
| 234 | 234 | Emit an end-of-file token. */ |
| 235 | 235 | $this->EOF(); |
| 236 | 236 | |
| 237 | - } elseif($this->content_model === self::PLAINTEXT) { |
|
| 237 | + } elseif ($this->content_model === self::PLAINTEXT) { |
|
| 238 | 238 | /* When the content model flag is set to the PLAINTEXT state |
| 239 | 239 | THIS DIFFERS GREATLY FROM THE SPEC: Get the remaining characters of |
| 240 | 240 | the text and emit it as a character token. */ |
@@ -280,7 +280,7 @@ discard block |
||
| 280 | 280 | } |
| 281 | 281 | |
| 282 | 282 | private function tagOpenState() { |
| 283 | - switch($this->content_model) { |
|
| 283 | + switch ($this->content_model) { |
|
| 284 | 284 | case self::RCDATA: |
| 285 | 285 | case self::CDATA: |
| 286 | 286 | /* If the next input character is a U+002F SOLIDUS (/) character, |
@@ -288,7 +288,7 @@ discard block |
||
| 288 | 288 | input character is not a U+002F SOLIDUS (/) character, emit a |
| 289 | 289 | U+003C LESS-THAN SIGN character token and switch to the data |
| 290 | 290 | state to process the next input character. */ |
| 291 | - if($this->character($this->char + 1) === '/') { |
|
| 291 | + if ($this->character($this->char + 1) === '/') { |
|
| 292 | 292 | $this->char++; |
| 293 | 293 | $this->state = 'closeTagOpen'; |
| 294 | 294 | |
@@ -308,17 +308,17 @@ discard block |
||
| 308 | 308 | $this->char++; |
| 309 | 309 | $char = $this->char(); |
| 310 | 310 | |
| 311 | - if($char === '!') { |
|
| 311 | + if ($char === '!') { |
|
| 312 | 312 | /* U+0021 EXCLAMATION MARK (!) |
| 313 | 313 | Switch to the markup declaration open state. */ |
| 314 | 314 | $this->state = 'markupDeclarationOpen'; |
| 315 | 315 | |
| 316 | - } elseif($char === '/') { |
|
| 316 | + } elseif ($char === '/') { |
|
| 317 | 317 | /* U+002F SOLIDUS (/) |
| 318 | 318 | Switch to the close tag open state. */ |
| 319 | 319 | $this->state = 'closeTagOpen'; |
| 320 | 320 | |
| 321 | - } elseif(preg_match('/^[A-Za-z]$/', $char)) { |
|
| 321 | + } elseif (preg_match('/^[A-Za-z]$/', $char)) { |
|
| 322 | 322 | /* U+0041 LATIN LETTER A through to U+005A LATIN LETTER Z |
| 323 | 323 | Create a new start tag token, set its tag name to the lowercase |
| 324 | 324 | version of the input character (add 0x0020 to the character's code |
@@ -332,7 +332,7 @@ discard block |
||
| 332 | 332 | |
| 333 | 333 | $this->state = 'tagName'; |
| 334 | 334 | |
| 335 | - } elseif($char === '>') { |
|
| 335 | + } elseif ($char === '>') { |
|
| 336 | 336 | /* U+003E GREATER-THAN SIGN (>) |
| 337 | 337 | Parse error. Emit a U+003C LESS-THAN SIGN character token and a |
| 338 | 338 | U+003E GREATER-THAN SIGN character token. Switch to the data state. */ |
@@ -343,7 +343,7 @@ discard block |
||
| 343 | 343 | |
| 344 | 344 | $this->state = 'data'; |
| 345 | 345 | |
| 346 | - } elseif($char === '?') { |
|
| 346 | + } elseif ($char === '?') { |
|
| 347 | 347 | /* U+003F QUESTION MARK (?) |
| 348 | 348 | Parse error. Switch to the bogus comment state. */ |
| 349 | 349 | $this->state = 'bogusComment'; |
@@ -368,7 +368,7 @@ discard block |
||
| 368 | 368 | $next_node = strtolower($this->characters('A-Za-z', $this->char + 1)); |
| 369 | 369 | $the_same = count($this->tree->stack) > 0 && $next_node === end($this->tree->stack)->nodeName; |
| 370 | 370 | |
| 371 | - if(($this->content_model === self::RCDATA || $this->content_model === self::CDATA) && |
|
| 371 | + if (($this->content_model === self::RCDATA || $this->content_model === self::CDATA) && |
|
| 372 | 372 | (!$the_same || ($the_same && (!preg_match('/[\t\n\x0b\x0c >\/]/', |
| 373 | 373 | $this->character($this->char + 1 + strlen($next_node))) || $this->EOF === $this->char)))) { |
| 374 | 374 | /* If the content model flag is set to the RCDATA or CDATA states then |
@@ -400,7 +400,7 @@ discard block |
||
| 400 | 400 | $this->char++; |
| 401 | 401 | $char = $this->char(); |
| 402 | 402 | |
| 403 | - if(preg_match('/^[A-Za-z]$/', $char)) { |
|
| 403 | + if (preg_match('/^[A-Za-z]$/', $char)) { |
|
| 404 | 404 | /* U+0041 LATIN LETTER A through to U+005A LATIN LETTER Z |
| 405 | 405 | Create a new end tag token, set its tag name to the lowercase version |
| 406 | 406 | of the input character (add 0x0020 to the character's code point), then |
@@ -413,12 +413,12 @@ discard block |
||
| 413 | 413 | |
| 414 | 414 | $this->state = 'tagName'; |
| 415 | 415 | |
| 416 | - } elseif($char === '>') { |
|
| 416 | + } elseif ($char === '>') { |
|
| 417 | 417 | /* U+003E GREATER-THAN SIGN (>) |
| 418 | 418 | Parse error. Switch to the data state. */ |
| 419 | 419 | $this->state = 'data'; |
| 420 | 420 | |
| 421 | - } elseif($this->char === $this->EOF) { |
|
| 421 | + } elseif ($this->char === $this->EOF) { |
|
| 422 | 422 | /* EOF |
| 423 | 423 | Parse error. Emit a U+003C LESS-THAN SIGN character token and a U+002F |
| 424 | 424 | SOLIDUS character token. Reconsume the EOF character in the data state. */ |
@@ -442,7 +442,7 @@ discard block |
||
| 442 | 442 | $this->char++; |
| 443 | 443 | $char = $this->character($this->char); |
| 444 | 444 | |
| 445 | - if(preg_match('/^[\t\n\x0b\x0c ]$/', $char)) { |
|
| 445 | + if (preg_match('/^[\t\n\x0b\x0c ]$/', $char)) { |
|
| 446 | 446 | /* U+0009 CHARACTER TABULATION |
| 447 | 447 | U+000A LINE FEED (LF) |
| 448 | 448 | U+000B LINE TABULATION |
@@ -451,13 +451,13 @@ discard block |
||
| 451 | 451 | Switch to the before attribute name state. */ |
| 452 | 452 | $this->state = 'beforeAttributeName'; |
| 453 | 453 | |
| 454 | - } elseif($char === '>') { |
|
| 454 | + } elseif ($char === '>') { |
|
| 455 | 455 | /* U+003E GREATER-THAN SIGN (>) |
| 456 | 456 | Emit the current tag token. Switch to the data state. */ |
| 457 | 457 | $this->emitToken($this->token); |
| 458 | 458 | $this->state = 'data'; |
| 459 | 459 | |
| 460 | - } elseif($this->char === $this->EOF) { |
|
| 460 | + } elseif ($this->char === $this->EOF) { |
|
| 461 | 461 | /* EOF |
| 462 | 462 | Parse error. Emit the current tag token. Reconsume the EOF |
| 463 | 463 | character in the data state. */ |
@@ -466,7 +466,7 @@ discard block |
||
| 466 | 466 | $this->char--; |
| 467 | 467 | $this->state = 'data'; |
| 468 | 468 | |
| 469 | - } elseif($char === '/') { |
|
| 469 | + } elseif ($char === '/') { |
|
| 470 | 470 | /* U+002F SOLIDUS (/) |
| 471 | 471 | Parse error unless this is a permitted slash. Switch to the before |
| 472 | 472 | attribute name state. */ |
@@ -486,7 +486,7 @@ discard block |
||
| 486 | 486 | $this->char++; |
| 487 | 487 | $char = $this->character($this->char); |
| 488 | 488 | |
| 489 | - if(preg_match('/^[\t\n\x0b\x0c ]$/', $char)) { |
|
| 489 | + if (preg_match('/^[\t\n\x0b\x0c ]$/', $char)) { |
|
| 490 | 490 | /* U+0009 CHARACTER TABULATION |
| 491 | 491 | U+000A LINE FEED (LF) |
| 492 | 492 | U+000B LINE TABULATION |
@@ -495,19 +495,19 @@ discard block |
||
| 495 | 495 | Stay in the before attribute name state. */ |
| 496 | 496 | $this->state = 'beforeAttributeName'; |
| 497 | 497 | |
| 498 | - } elseif($char === '>') { |
|
| 498 | + } elseif ($char === '>') { |
|
| 499 | 499 | /* U+003E GREATER-THAN SIGN (>) |
| 500 | 500 | Emit the current tag token. Switch to the data state. */ |
| 501 | 501 | $this->emitToken($this->token); |
| 502 | 502 | $this->state = 'data'; |
| 503 | 503 | |
| 504 | - } elseif($char === '/') { |
|
| 504 | + } elseif ($char === '/') { |
|
| 505 | 505 | /* U+002F SOLIDUS (/) |
| 506 | 506 | Parse error unless this is a permitted slash. Stay in the before |
| 507 | 507 | attribute name state. */ |
| 508 | 508 | $this->state = 'beforeAttributeName'; |
| 509 | 509 | |
| 510 | - } elseif($this->char === $this->EOF) { |
|
| 510 | + } elseif ($this->char === $this->EOF) { |
|
| 511 | 511 | /* EOF |
| 512 | 512 | Parse error. Emit the current tag token. Reconsume the EOF |
| 513 | 513 | character in the data state. */ |
@@ -535,7 +535,7 @@ discard block |
||
| 535 | 535 | $this->char++; |
| 536 | 536 | $char = $this->character($this->char); |
| 537 | 537 | |
| 538 | - if(preg_match('/^[\t\n\x0b\x0c ]$/', $char)) { |
|
| 538 | + if (preg_match('/^[\t\n\x0b\x0c ]$/', $char)) { |
|
| 539 | 539 | /* U+0009 CHARACTER TABULATION |
| 540 | 540 | U+000A LINE FEED (LF) |
| 541 | 541 | U+000B LINE TABULATION |
@@ -544,24 +544,24 @@ discard block |
||
| 544 | 544 | Stay in the before attribute name state. */ |
| 545 | 545 | $this->state = 'afterAttributeName'; |
| 546 | 546 | |
| 547 | - } elseif($char === '=') { |
|
| 547 | + } elseif ($char === '=') { |
|
| 548 | 548 | /* U+003D EQUALS SIGN (=) |
| 549 | 549 | Switch to the before attribute value state. */ |
| 550 | 550 | $this->state = 'beforeAttributeValue'; |
| 551 | 551 | |
| 552 | - } elseif($char === '>') { |
|
| 552 | + } elseif ($char === '>') { |
|
| 553 | 553 | /* U+003E GREATER-THAN SIGN (>) |
| 554 | 554 | Emit the current tag token. Switch to the data state. */ |
| 555 | 555 | $this->emitToken($this->token); |
| 556 | 556 | $this->state = 'data'; |
| 557 | 557 | |
| 558 | - } elseif($char === '/' && $this->character($this->char + 1) !== '>') { |
|
| 558 | + } elseif ($char === '/' && $this->character($this->char + 1) !== '>') { |
|
| 559 | 559 | /* U+002F SOLIDUS (/) |
| 560 | 560 | Parse error unless this is a permitted slash. Switch to the before |
| 561 | 561 | attribute name state. */ |
| 562 | 562 | $this->state = 'beforeAttributeName'; |
| 563 | 563 | |
| 564 | - } elseif($this->char === $this->EOF) { |
|
| 564 | + } elseif ($this->char === $this->EOF) { |
|
| 565 | 565 | /* EOF |
| 566 | 566 | Parse error. Emit the current tag token. Reconsume the EOF |
| 567 | 567 | character in the data state. */ |
@@ -586,7 +586,7 @@ discard block |
||
| 586 | 586 | $this->char++; |
| 587 | 587 | $char = $this->character($this->char); |
| 588 | 588 | |
| 589 | - if(preg_match('/^[\t\n\x0b\x0c ]$/', $char)) { |
|
| 589 | + if (preg_match('/^[\t\n\x0b\x0c ]$/', $char)) { |
|
| 590 | 590 | /* U+0009 CHARACTER TABULATION |
| 591 | 591 | U+000A LINE FEED (LF) |
| 592 | 592 | U+000B LINE TABULATION |
@@ -595,24 +595,24 @@ discard block |
||
| 595 | 595 | Stay in the after attribute name state. */ |
| 596 | 596 | $this->state = 'afterAttributeName'; |
| 597 | 597 | |
| 598 | - } elseif($char === '=') { |
|
| 598 | + } elseif ($char === '=') { |
|
| 599 | 599 | /* U+003D EQUALS SIGN (=) |
| 600 | 600 | Switch to the before attribute value state. */ |
| 601 | 601 | $this->state = 'beforeAttributeValue'; |
| 602 | 602 | |
| 603 | - } elseif($char === '>') { |
|
| 603 | + } elseif ($char === '>') { |
|
| 604 | 604 | /* U+003E GREATER-THAN SIGN (>) |
| 605 | 605 | Emit the current tag token. Switch to the data state. */ |
| 606 | 606 | $this->emitToken($this->token); |
| 607 | 607 | $this->state = 'data'; |
| 608 | 608 | |
| 609 | - } elseif($char === '/' && $this->character($this->char + 1) !== '>') { |
|
| 609 | + } elseif ($char === '/' && $this->character($this->char + 1) !== '>') { |
|
| 610 | 610 | /* U+002F SOLIDUS (/) |
| 611 | 611 | Parse error unless this is a permitted slash. Switch to the |
| 612 | 612 | before attribute name state. */ |
| 613 | 613 | $this->state = 'beforeAttributeName'; |
| 614 | 614 | |
| 615 | - } elseif($this->char === $this->EOF) { |
|
| 615 | + } elseif ($this->char === $this->EOF) { |
|
| 616 | 616 | /* EOF |
| 617 | 617 | Parse error. Emit the current tag token. Reconsume the EOF |
| 618 | 618 | character in the data state. */ |
@@ -640,7 +640,7 @@ discard block |
||
| 640 | 640 | $this->char++; |
| 641 | 641 | $char = $this->character($this->char); |
| 642 | 642 | |
| 643 | - if(preg_match('/^[\t\n\x0b\x0c ]$/', $char)) { |
|
| 643 | + if (preg_match('/^[\t\n\x0b\x0c ]$/', $char)) { |
|
| 644 | 644 | /* U+0009 CHARACTER TABULATION |
| 645 | 645 | U+000A LINE FEED (LF) |
| 646 | 646 | U+000B LINE TABULATION |
@@ -649,24 +649,24 @@ discard block |
||
| 649 | 649 | Stay in the before attribute value state. */ |
| 650 | 650 | $this->state = 'beforeAttributeValue'; |
| 651 | 651 | |
| 652 | - } elseif($char === '"') { |
|
| 652 | + } elseif ($char === '"') { |
|
| 653 | 653 | /* U+0022 QUOTATION MARK (") |
| 654 | 654 | Switch to the attribute value (double-quoted) state. */ |
| 655 | 655 | $this->state = 'attributeValueDoubleQuoted'; |
| 656 | 656 | |
| 657 | - } elseif($char === '&') { |
|
| 657 | + } elseif ($char === '&') { |
|
| 658 | 658 | /* U+0026 AMPERSAND (&) |
| 659 | 659 | Switch to the attribute value (unquoted) state and reconsume |
| 660 | 660 | this input character. */ |
| 661 | 661 | $this->char--; |
| 662 | 662 | $this->state = 'attributeValueUnquoted'; |
| 663 | 663 | |
| 664 | - } elseif($char === '\'') { |
|
| 664 | + } elseif ($char === '\'') { |
|
| 665 | 665 | /* U+0027 APOSTROPHE (') |
| 666 | 666 | Switch to the attribute value (single-quoted) state. */ |
| 667 | 667 | $this->state = 'attributeValueSingleQuoted'; |
| 668 | 668 | |
| 669 | - } elseif($char === '>') { |
|
| 669 | + } elseif ($char === '>') { |
|
| 670 | 670 | /* U+003E GREATER-THAN SIGN (>) |
| 671 | 671 | Emit the current tag token. Switch to the data state. */ |
| 672 | 672 | $this->emitToken($this->token); |
@@ -688,17 +688,17 @@ discard block |
||
| 688 | 688 | $this->char++; |
| 689 | 689 | $char = $this->character($this->char); |
| 690 | 690 | |
| 691 | - if($char === '"') { |
|
| 691 | + if ($char === '"') { |
|
| 692 | 692 | /* U+0022 QUOTATION MARK (") |
| 693 | 693 | Switch to the before attribute name state. */ |
| 694 | 694 | $this->state = 'beforeAttributeName'; |
| 695 | 695 | |
| 696 | - } elseif($char === '&') { |
|
| 696 | + } elseif ($char === '&') { |
|
| 697 | 697 | /* U+0026 AMPERSAND (&) |
| 698 | 698 | Switch to the entity in attribute value state. */ |
| 699 | 699 | $this->entityInAttributeValueState('double'); |
| 700 | 700 | |
| 701 | - } elseif($this->char === $this->EOF) { |
|
| 701 | + } elseif ($this->char === $this->EOF) { |
|
| 702 | 702 | /* EOF |
| 703 | 703 | Parse error. Emit the current tag token. Reconsume the character |
| 704 | 704 | in the data state. */ |
@@ -723,17 +723,17 @@ discard block |
||
| 723 | 723 | $this->char++; |
| 724 | 724 | $char = $this->character($this->char); |
| 725 | 725 | |
| 726 | - if($char === '\'') { |
|
| 726 | + if ($char === '\'') { |
|
| 727 | 727 | /* U+0022 QUOTATION MARK (') |
| 728 | 728 | Switch to the before attribute name state. */ |
| 729 | 729 | $this->state = 'beforeAttributeName'; |
| 730 | 730 | |
| 731 | - } elseif($char === '&') { |
|
| 731 | + } elseif ($char === '&') { |
|
| 732 | 732 | /* U+0026 AMPERSAND (&) |
| 733 | 733 | Switch to the entity in attribute value state. */ |
| 734 | 734 | $this->entityInAttributeValueState('single'); |
| 735 | 735 | |
| 736 | - } elseif($this->char === $this->EOF) { |
|
| 736 | + } elseif ($this->char === $this->EOF) { |
|
| 737 | 737 | /* EOF |
| 738 | 738 | Parse error. Emit the current tag token. Reconsume the character |
| 739 | 739 | in the data state. */ |
@@ -758,7 +758,7 @@ discard block |
||
| 758 | 758 | $this->char++; |
| 759 | 759 | $char = $this->character($this->char); |
| 760 | 760 | |
| 761 | - if(preg_match('/^[\t\n\x0b\x0c ]$/', $char)) { |
|
| 761 | + if (preg_match('/^[\t\n\x0b\x0c ]$/', $char)) { |
|
| 762 | 762 | /* U+0009 CHARACTER TABULATION |
| 763 | 763 | U+000A LINE FEED (LF) |
| 764 | 764 | U+000B LINE TABULATION |
@@ -767,12 +767,12 @@ discard block |
||
| 767 | 767 | Switch to the before attribute name state. */ |
| 768 | 768 | $this->state = 'beforeAttributeName'; |
| 769 | 769 | |
| 770 | - } elseif($char === '&') { |
|
| 770 | + } elseif ($char === '&') { |
|
| 771 | 771 | /* U+0026 AMPERSAND (&) |
| 772 | 772 | Switch to the entity in attribute value state. */ |
| 773 | 773 | $this->entityInAttributeValueState(); |
| 774 | 774 | |
| 775 | - } elseif($char === '>') { |
|
| 775 | + } elseif ($char === '>') { |
|
| 776 | 776 | /* U+003E GREATER-THAN SIGN (>) |
| 777 | 777 | Emit the current tag token. Switch to the data state. */ |
| 778 | 778 | $this->emitToken($this->token); |
@@ -825,7 +825,7 @@ discard block |
||
| 825 | 825 | $this->state = 'data'; |
| 826 | 826 | |
| 827 | 827 | /* If the end of the file was reached, reconsume the EOF character. */ |
| 828 | - if($this->char === $this->EOF) { |
|
| 828 | + if ($this->char === $this->EOF) { |
|
| 829 | 829 | $this->char = $this->EOF - 1; |
| 830 | 830 | } |
| 831 | 831 | } |
@@ -834,7 +834,7 @@ discard block |
||
| 834 | 834 | /* If the next two characters are both U+002D HYPHEN-MINUS (-) |
| 835 | 835 | characters, consume those two characters, create a comment token whose |
| 836 | 836 | data is the empty string, and switch to the comment state. */ |
| 837 | - if($this->character($this->char + 1, 2) === '--') { |
|
| 837 | + if ($this->character($this->char + 1, 2) === '--') { |
|
| 838 | 838 | $this->char += 2; |
| 839 | 839 | $this->state = 'comment'; |
| 840 | 840 | $this->token = array( |
@@ -845,7 +845,7 @@ discard block |
||
| 845 | 845 | /* Otherwise if the next seven chacacters are a case-insensitive match |
| 846 | 846 | for the word "DOCTYPE", then consume those characters and switch to the |
| 847 | 847 | DOCTYPE state. */ |
| 848 | - } elseif(strtolower($this->character($this->char + 1, 7)) === 'doctype') { |
|
| 848 | + } elseif (strtolower($this->character($this->char + 1, 7)) === 'doctype') { |
|
| 849 | 849 | $this->char += 7; |
| 850 | 850 | $this->state = 'doctype'; |
| 851 | 851 | |
@@ -864,12 +864,12 @@ discard block |
||
| 864 | 864 | $char = $this->char(); |
| 865 | 865 | |
| 866 | 866 | /* U+002D HYPHEN-MINUS (-) */ |
| 867 | - if($char === '-') { |
|
| 867 | + if ($char === '-') { |
|
| 868 | 868 | /* Switch to the comment dash state */ |
| 869 | 869 | $this->state = 'commentDash'; |
| 870 | 870 | |
| 871 | 871 | /* EOF */ |
| 872 | - } elseif($this->char === $this->EOF) { |
|
| 872 | + } elseif ($this->char === $this->EOF) { |
|
| 873 | 873 | /* Parse error. Emit the comment token. Reconsume the EOF character |
| 874 | 874 | in the data state. */ |
| 875 | 875 | $this->emitToken($this->token); |
@@ -890,12 +890,12 @@ discard block |
||
| 890 | 890 | $char = $this->char(); |
| 891 | 891 | |
| 892 | 892 | /* U+002D HYPHEN-MINUS (-) */ |
| 893 | - if($char === '-') { |
|
| 893 | + if ($char === '-') { |
|
| 894 | 894 | /* Switch to the comment end state */ |
| 895 | 895 | $this->state = 'commentEnd'; |
| 896 | 896 | |
| 897 | 897 | /* EOF */ |
| 898 | - } elseif($this->char === $this->EOF) { |
|
| 898 | + } elseif ($this->char === $this->EOF) { |
|
| 899 | 899 | /* Parse error. Emit the comment token. Reconsume the EOF character |
| 900 | 900 | in the data state. */ |
| 901 | 901 | $this->emitToken($this->token); |
@@ -916,14 +916,14 @@ discard block |
||
| 916 | 916 | $this->char++; |
| 917 | 917 | $char = $this->char(); |
| 918 | 918 | |
| 919 | - if($char === '>') { |
|
| 919 | + if ($char === '>') { |
|
| 920 | 920 | $this->emitToken($this->token); |
| 921 | 921 | $this->state = 'data'; |
| 922 | 922 | |
| 923 | - } elseif($char === '-') { |
|
| 923 | + } elseif ($char === '-') { |
|
| 924 | 924 | $this->token['data'] .= '-'; |
| 925 | 925 | |
| 926 | - } elseif($this->char === $this->EOF) { |
|
| 926 | + } elseif ($this->char === $this->EOF) { |
|
| 927 | 927 | $this->emitToken($this->token); |
| 928 | 928 | $this->char--; |
| 929 | 929 | $this->state = 'data'; |
@@ -939,7 +939,7 @@ discard block |
||
| 939 | 939 | $this->char++; |
| 940 | 940 | $char = $this->char(); |
| 941 | 941 | |
| 942 | - if(preg_match('/^[\t\n\x0b\x0c ]$/', $char)) { |
|
| 942 | + if (preg_match('/^[\t\n\x0b\x0c ]$/', $char)) { |
|
| 943 | 943 | $this->state = 'beforeDoctypeName'; |
| 944 | 944 | |
| 945 | 945 | } else { |
@@ -953,10 +953,10 @@ discard block |
||
| 953 | 953 | $this->char++; |
| 954 | 954 | $char = $this->char(); |
| 955 | 955 | |
| 956 | - if(preg_match('/^[\t\n\x0b\x0c ]$/', $char)) { |
|
| 956 | + if (preg_match('/^[\t\n\x0b\x0c ]$/', $char)) { |
|
| 957 | 957 | // Stay in the before DOCTYPE name state. |
| 958 | 958 | |
| 959 | - } elseif(preg_match('/^[a-z]$/', $char)) { |
|
| 959 | + } elseif (preg_match('/^[a-z]$/', $char)) { |
|
| 960 | 960 | $this->token = array( |
| 961 | 961 | 'name' => strtoupper($char), |
| 962 | 962 | 'type' => self::DOCTYPE, |
@@ -965,7 +965,7 @@ discard block |
||
| 965 | 965 | |
| 966 | 966 | $this->state = 'doctypeName'; |
| 967 | 967 | |
| 968 | - } elseif($char === '>') { |
|
| 968 | + } elseif ($char === '>') { |
|
| 969 | 969 | $this->emitToken(array( |
| 970 | 970 | 'name' => null, |
| 971 | 971 | 'type' => self::DOCTYPE, |
@@ -974,7 +974,7 @@ discard block |
||
| 974 | 974 | |
| 975 | 975 | $this->state = 'data'; |
| 976 | 976 | |
| 977 | - } elseif($this->char === $this->EOF) { |
|
| 977 | + } elseif ($this->char === $this->EOF) { |
|
| 978 | 978 | $this->emitToken(array( |
| 979 | 979 | 'name' => null, |
| 980 | 980 | 'type' => self::DOCTYPE, |
@@ -1000,17 +1000,17 @@ discard block |
||
| 1000 | 1000 | $this->char++; |
| 1001 | 1001 | $char = $this->char(); |
| 1002 | 1002 | |
| 1003 | - if(preg_match('/^[\t\n\x0b\x0c ]$/', $char)) { |
|
| 1003 | + if (preg_match('/^[\t\n\x0b\x0c ]$/', $char)) { |
|
| 1004 | 1004 | $this->state = 'AfterDoctypeName'; |
| 1005 | 1005 | |
| 1006 | - } elseif($char === '>') { |
|
| 1006 | + } elseif ($char === '>') { |
|
| 1007 | 1007 | $this->emitToken($this->token); |
| 1008 | 1008 | $this->state = 'data'; |
| 1009 | 1009 | |
| 1010 | - } elseif(preg_match('/^[a-z]$/', $char)) { |
|
| 1010 | + } elseif (preg_match('/^[a-z]$/', $char)) { |
|
| 1011 | 1011 | $this->token['name'] .= strtoupper($char); |
| 1012 | 1012 | |
| 1013 | - } elseif($this->char === $this->EOF) { |
|
| 1013 | + } elseif ($this->char === $this->EOF) { |
|
| 1014 | 1014 | $this->emitToken($this->token); |
| 1015 | 1015 | $this->char--; |
| 1016 | 1016 | $this->state = 'data'; |
@@ -1029,14 +1029,14 @@ discard block |
||
| 1029 | 1029 | $this->char++; |
| 1030 | 1030 | $char = $this->char(); |
| 1031 | 1031 | |
| 1032 | - if(preg_match('/^[\t\n\x0b\x0c ]$/', $char)) { |
|
| 1032 | + if (preg_match('/^[\t\n\x0b\x0c ]$/', $char)) { |
|
| 1033 | 1033 | // Stay in the DOCTYPE name state. |
| 1034 | 1034 | |
| 1035 | - } elseif($char === '>') { |
|
| 1035 | + } elseif ($char === '>') { |
|
| 1036 | 1036 | $this->emitToken($this->token); |
| 1037 | 1037 | $this->state = 'data'; |
| 1038 | 1038 | |
| 1039 | - } elseif($this->char === $this->EOF) { |
|
| 1039 | + } elseif ($this->char === $this->EOF) { |
|
| 1040 | 1040 | $this->emitToken($this->token); |
| 1041 | 1041 | $this->char--; |
| 1042 | 1042 | $this->state = 'data'; |
@@ -1052,11 +1052,11 @@ discard block |
||
| 1052 | 1052 | $this->char++; |
| 1053 | 1053 | $char = $this->char(); |
| 1054 | 1054 | |
| 1055 | - if($char === '>') { |
|
| 1055 | + if ($char === '>') { |
|
| 1056 | 1056 | $this->emitToken($this->token); |
| 1057 | 1057 | $this->state = 'data'; |
| 1058 | 1058 | |
| 1059 | - } elseif($this->char === $this->EOF) { |
|
| 1059 | + } elseif ($this->char === $this->EOF) { |
|
| 1060 | 1060 | $this->emitToken($this->token); |
| 1061 | 1061 | $this->char--; |
| 1062 | 1062 | $this->state = 'data'; |
@@ -1075,13 +1075,13 @@ discard block |
||
| 1075 | 1075 | // The behaviour depends on the identity of the next character (the |
| 1076 | 1076 | // one immediately after the U+0026 AMPERSAND character): |
| 1077 | 1077 | |
| 1078 | - switch($this->character($this->char + 1)) { |
|
| 1078 | + switch ($this->character($this->char + 1)) { |
|
| 1079 | 1079 | // U+0023 NUMBER SIGN (#) |
| 1080 | 1080 | case '#': |
| 1081 | 1081 | |
| 1082 | 1082 | // The behaviour further depends on the character after the |
| 1083 | 1083 | // U+0023 NUMBER SIGN: |
| 1084 | - switch($this->character($this->char + 1)) { |
|
| 1084 | + switch ($this->character($this->char + 1)) { |
|
| 1085 | 1085 | // U+0078 LATIN SMALL LETTER X |
| 1086 | 1086 | // U+0058 LATIN CAPITAL LETTER X |
| 1087 | 1087 | case 'x': |
@@ -1124,12 +1124,12 @@ discard block |
||
| 1124 | 1124 | $e_name = $this->characters('0-9A-Za-z;', $this->char + 1); |
| 1125 | 1125 | $len = strlen($e_name); |
| 1126 | 1126 | |
| 1127 | - for($c = 1; $c <= $len; $c++) { |
|
| 1127 | + for ($c = 1; $c <= $len; $c++) { |
|
| 1128 | 1128 | $id = substr($e_name, 0, $c); |
| 1129 | 1129 | $this->char++; |
| 1130 | 1130 | |
| 1131 | - if(in_array($id, $this->entities)) { |
|
| 1132 | - if ($e_name[$c-1] !== ';') { |
|
| 1131 | + if (in_array($id, $this->entities)) { |
|
| 1132 | + if ($e_name[$c - 1] !== ';') { |
|
| 1133 | 1133 | if ($c < $len && $e_name[$c] == ';') { |
| 1134 | 1134 | $this->char++; // consume extra semicolon |
| 1135 | 1135 | } |
@@ -1144,7 +1144,7 @@ discard block |
||
| 1144 | 1144 | break; |
| 1145 | 1145 | } |
| 1146 | 1146 | |
| 1147 | - if(!$cond) { |
|
| 1147 | + if (!$cond) { |
|
| 1148 | 1148 | // If no match can be made, then this is a parse error. No |
| 1149 | 1149 | // characters are consumed, and nothing is returned. |
| 1150 | 1150 | $this->char = $start; |
@@ -1159,10 +1159,10 @@ discard block |
||
| 1159 | 1159 | private function emitToken($token) { |
| 1160 | 1160 | $emit = $this->tree->emitToken($token); |
| 1161 | 1161 | |
| 1162 | - if(is_int($emit)) { |
|
| 1162 | + if (is_int($emit)) { |
|
| 1163 | 1163 | $this->content_model = $emit; |
| 1164 | 1164 | |
| 1165 | - } elseif($token['type'] === self::ENDTAG) { |
|
| 1165 | + } elseif ($token['type'] === self::ENDTAG) { |
|
| 1166 | 1166 | $this->content_model = self::PCDATA; |
| 1167 | 1167 | } |
| 1168 | 1168 | } |
@@ -1187,15 +1187,15 @@ discard block |
||
| 1187 | 1187 | private $head_pointer = null; |
| 1188 | 1188 | private $form_pointer = null; |
| 1189 | 1189 | |
| 1190 | - private $scoping = array('button','caption','html','marquee','object','table','td','th'); |
|
| 1191 | - private $formatting = array('a','b','big','em','font','i','nobr','s','small','strike','strong','tt','u'); |
|
| 1192 | - private $special = array('address','area','base','basefont','bgsound', |
|
| 1193 | - 'blockquote','body','br','center','col','colgroup','dd','dir','div','dl', |
|
| 1194 | - 'dt','embed','fieldset','form','frame','frameset','h1','h2','h3','h4','h5', |
|
| 1195 | - 'h6','head','hr','iframe','image','img','input','isindex','li','link', |
|
| 1196 | - 'listing','menu','meta','noembed','noframes','noscript','ol','optgroup', |
|
| 1197 | - 'option','p','param','plaintext','pre','script','select','spacer','style', |
|
| 1198 | - 'tbody','textarea','tfoot','thead','title','tr','ul','wbr'); |
|
| 1190 | + private $scoping = array('button', 'caption', 'html', 'marquee', 'object', 'table', 'td', 'th'); |
|
| 1191 | + private $formatting = array('a', 'b', 'big', 'em', 'font', 'i', 'nobr', 's', 'small', 'strike', 'strong', 'tt', 'u'); |
|
| 1192 | + private $special = array('address', 'area', 'base', 'basefont', 'bgsound', |
|
| 1193 | + 'blockquote', 'body', 'br', 'center', 'col', 'colgroup', 'dd', 'dir', 'div', 'dl', |
|
| 1194 | + 'dt', 'embed', 'fieldset', 'form', 'frame', 'frameset', 'h1', 'h2', 'h3', 'h4', 'h5', |
|
| 1195 | + 'h6', 'head', 'hr', 'iframe', 'image', 'img', 'input', 'isindex', 'li', 'link', |
|
| 1196 | + 'listing', 'menu', 'meta', 'noembed', 'noframes', 'noscript', 'ol', 'optgroup', |
|
| 1197 | + 'option', 'p', 'param', 'plaintext', 'pre', 'script', 'select', 'spacer', 'style', |
|
| 1198 | + 'tbody', 'textarea', 'tfoot', 'thead', 'title', 'tr', 'ul', 'wbr'); |
|
| 1199 | 1199 | |
| 1200 | 1200 | // The different phases. |
| 1201 | 1201 | const INIT_PHASE = 0; |
@@ -1240,7 +1240,7 @@ discard block |
||
| 1240 | 1240 | |
| 1241 | 1241 | // Process tag tokens |
| 1242 | 1242 | public function emitToken($token) { |
| 1243 | - switch($this->phase) { |
|
| 1243 | + switch ($this->phase) { |
|
| 1244 | 1244 | case self::INIT_PHASE: return $this->initPhase($token); break; |
| 1245 | 1245 | case self::ROOT_PHASE: return $this->rootElementPhase($token); break; |
| 1246 | 1246 | case self::MAIN_PHASE: return $this->mainPhase($token); break; |
@@ -1260,7 +1260,7 @@ discard block |
||
| 1260 | 1260 | U+000A LINE FEED (LF), U+000B LINE TABULATION, U+000C FORM FEED (FF), |
| 1261 | 1261 | or U+0020 SPACE |
| 1262 | 1262 | An end-of-file token */ |
| 1263 | - if((isset($token['error']) && $token['error']) || |
|
| 1263 | + if ((isset($token['error']) && $token['error']) || |
|
| 1264 | 1264 | $token['type'] === HTML5::COMMENT || |
| 1265 | 1265 | $token['type'] === HTML5::STARTTAG || |
| 1266 | 1266 | $token['type'] === HTML5::ENDTAG || |
@@ -1276,7 +1276,7 @@ discard block |
||
| 1276 | 1276 | return $this->rootElementPhase($token); |
| 1277 | 1277 | |
| 1278 | 1278 | /* A DOCTYPE token marked as being correct */ |
| 1279 | - } elseif(isset($token['error']) && !$token['error']) { |
|
| 1279 | + } elseif (isset($token['error']) && !$token['error']) { |
|
| 1280 | 1280 | /* Append a DocumentType node to the Document node, with the name |
| 1281 | 1281 | attribute set to the name given in the DOCTYPE token (which will be |
| 1282 | 1282 | "HTML"), and the other attributes specific to DocumentType objects |
@@ -1290,7 +1290,7 @@ discard block |
||
| 1290 | 1290 | /* A character token that is one of one of U+0009 CHARACTER TABULATION, |
| 1291 | 1291 | U+000A LINE FEED (LF), U+000B LINE TABULATION, U+000C FORM FEED (FF), |
| 1292 | 1292 | or U+0020 SPACE */ |
| 1293 | - } elseif(isset($token['data']) && preg_match('/^[\t\n\x0b\x0c ]+$/', |
|
| 1293 | + } elseif (isset($token['data']) && preg_match('/^[\t\n\x0b\x0c ]+$/', |
|
| 1294 | 1294 | $token['data'])) { |
| 1295 | 1295 | /* Append that character to the Document node. */ |
| 1296 | 1296 | $text = $this->dom->createTextNode($token['data']); |
@@ -1303,11 +1303,11 @@ discard block |
||
| 1303 | 1303 | stage, it must be processed as described in this section. */ |
| 1304 | 1304 | |
| 1305 | 1305 | /* A DOCTYPE token */ |
| 1306 | - if($token['type'] === HTML5::DOCTYPE) { |
|
| 1306 | + if ($token['type'] === HTML5::DOCTYPE) { |
|
| 1307 | 1307 | // Parse error. Ignore the token. |
| 1308 | 1308 | |
| 1309 | 1309 | /* A comment token */ |
| 1310 | - } elseif($token['type'] === HTML5::COMMENT) { |
|
| 1310 | + } elseif ($token['type'] === HTML5::COMMENT) { |
|
| 1311 | 1311 | /* Append a Comment node to the Document object with the data |
| 1312 | 1312 | attribute set to the data given in the comment token. */ |
| 1313 | 1313 | $comment = $this->dom->createComment($token['data']); |
@@ -1316,7 +1316,7 @@ discard block |
||
| 1316 | 1316 | /* A character token that is one of one of U+0009 CHARACTER TABULATION, |
| 1317 | 1317 | U+000A LINE FEED (LF), U+000B LINE TABULATION, U+000C FORM FEED (FF), |
| 1318 | 1318 | or U+0020 SPACE */ |
| 1319 | - } elseif($token['type'] === HTML5::CHARACTR && |
|
| 1319 | + } elseif ($token['type'] === HTML5::CHARACTR && |
|
| 1320 | 1320 | preg_match('/^[\t\n\x0b\x0c ]+$/', $token['data'])) { |
| 1321 | 1321 | /* Append that character to the Document node. */ |
| 1322 | 1322 | $text = $this->dom->createTextNode($token['data']); |
@@ -1328,7 +1328,7 @@ discard block |
||
| 1328 | 1328 | A start tag token |
| 1329 | 1329 | An end tag token |
| 1330 | 1330 | An end-of-file token */ |
| 1331 | - } elseif(($token['type'] === HTML5::CHARACTR && |
|
| 1331 | + } elseif (($token['type'] === HTML5::CHARACTR && |
|
| 1332 | 1332 | !preg_match('/^[\t\n\x0b\x0c ]+$/', $token['data'])) || |
| 1333 | 1333 | $token['type'] === HTML5::STARTTAG || |
| 1334 | 1334 | $token['type'] === HTML5::ENDTAG || |
@@ -1349,11 +1349,11 @@ discard block |
||
| 1349 | 1349 | /* Tokens in the main phase must be handled as follows: */ |
| 1350 | 1350 | |
| 1351 | 1351 | /* A DOCTYPE token */ |
| 1352 | - if($token['type'] === HTML5::DOCTYPE) { |
|
| 1352 | + if ($token['type'] === HTML5::DOCTYPE) { |
|
| 1353 | 1353 | // Parse error. Ignore the token. |
| 1354 | 1354 | |
| 1355 | 1355 | /* A start tag token with the tag name "html" */ |
| 1356 | - } elseif($token['type'] === HTML5::STARTTAG && $token['name'] === 'html') { |
|
| 1356 | + } elseif ($token['type'] === HTML5::STARTTAG && $token['name'] === 'html') { |
|
| 1357 | 1357 | /* If this start tag token was not the first start tag token, then |
| 1358 | 1358 | it is a parse error. */ |
| 1359 | 1359 | |
@@ -1361,21 +1361,21 @@ discard block |
||
| 1361 | 1361 | is already present on the top element of the stack of open elements. |
| 1362 | 1362 | If it is not, add the attribute and its corresponding value to that |
| 1363 | 1363 | element. */ |
| 1364 | - foreach($token['attr'] as $attr) { |
|
| 1365 | - if(!$this->stack[0]->hasAttribute($attr['name'])) { |
|
| 1364 | + foreach ($token['attr'] as $attr) { |
|
| 1365 | + if (!$this->stack[0]->hasAttribute($attr['name'])) { |
|
| 1366 | 1366 | $this->stack[0]->setAttribute($attr['name'], $attr['value']); |
| 1367 | 1367 | } |
| 1368 | 1368 | } |
| 1369 | 1369 | |
| 1370 | 1370 | /* An end-of-file token */ |
| 1371 | - } elseif($token['type'] === HTML5::EOF) { |
|
| 1371 | + } elseif ($token['type'] === HTML5::EOF) { |
|
| 1372 | 1372 | /* Generate implied end tags. */ |
| 1373 | 1373 | $this->generateImpliedEndTags(); |
| 1374 | 1374 | |
| 1375 | 1375 | /* Anything else. */ |
| 1376 | 1376 | } else { |
| 1377 | 1377 | /* Depends on the insertion mode: */ |
| 1378 | - switch($this->mode) { |
|
| 1378 | + switch ($this->mode) { |
|
| 1379 | 1379 | case self::BEFOR_HEAD: return $this->beforeHead($token); break; |
| 1380 | 1380 | case self::IN_HEAD: return $this->inHead($token); break; |
| 1381 | 1381 | case self::AFTER_HEAD: return $this->afterHead($token); break; |
@@ -1401,19 +1401,19 @@ discard block |
||
| 1401 | 1401 | /* A character token that is one of one of U+0009 CHARACTER TABULATION, |
| 1402 | 1402 | U+000A LINE FEED (LF), U+000B LINE TABULATION, U+000C FORM FEED (FF), |
| 1403 | 1403 | or U+0020 SPACE */ |
| 1404 | - if($token['type'] === HTML5::CHARACTR && |
|
| 1404 | + if ($token['type'] === HTML5::CHARACTR && |
|
| 1405 | 1405 | preg_match('/^[\t\n\x0b\x0c ]+$/', $token['data'])) { |
| 1406 | 1406 | /* Append the character to the current node. */ |
| 1407 | 1407 | $this->insertText($token['data']); |
| 1408 | 1408 | |
| 1409 | 1409 | /* A comment token */ |
| 1410 | - } elseif($token['type'] === HTML5::COMMENT) { |
|
| 1410 | + } elseif ($token['type'] === HTML5::COMMENT) { |
|
| 1411 | 1411 | /* Append a Comment node to the current node with the data attribute |
| 1412 | 1412 | set to the data given in the comment token. */ |
| 1413 | 1413 | $this->insertComment($token['data']); |
| 1414 | 1414 | |
| 1415 | 1415 | /* A start tag token with the tag name "head" */ |
| 1416 | - } elseif($token['type'] === HTML5::STARTTAG && $token['name'] === 'head') { |
|
| 1416 | + } elseif ($token['type'] === HTML5::STARTTAG && $token['name'] === 'head') { |
|
| 1417 | 1417 | /* Create an element for the token, append the new element to the |
| 1418 | 1418 | current node and push it onto the stack of open elements. */ |
| 1419 | 1419 | $element = $this->insertElement($token); |
@@ -1429,7 +1429,7 @@ discard block |
||
| 1429 | 1429 | Or a character token that is not one of U+0009 CHARACTER TABULATION, |
| 1430 | 1430 | U+000A LINE FEED (LF), U+000B LINE TABULATION, U+000C FORM FEED (FF), |
| 1431 | 1431 | or U+0020 SPACE. Or any other start tag token */ |
| 1432 | - } elseif($token['type'] === HTML5::STARTTAG || |
|
| 1432 | + } elseif ($token['type'] === HTML5::STARTTAG || |
|
| 1433 | 1433 | ($token['type'] === HTML5::ENDTAG && $token['name'] === 'html') || |
| 1434 | 1434 | ($token['type'] === HTML5::CHARACTR && !preg_match('/^[\t\n\x0b\x0c ]$/', |
| 1435 | 1435 | $token['data']))) { |
@@ -1444,7 +1444,7 @@ discard block |
||
| 1444 | 1444 | return $this->inHead($token); |
| 1445 | 1445 | |
| 1446 | 1446 | /* Any other end tag */ |
| 1447 | - } elseif($token['type'] === HTML5::ENDTAG) { |
|
| 1447 | + } elseif ($token['type'] === HTML5::ENDTAG) { |
|
| 1448 | 1448 | /* Parse error. Ignore the token. */ |
| 1449 | 1449 | } |
| 1450 | 1450 | } |
@@ -1459,7 +1459,7 @@ discard block |
||
| 1459 | 1459 | THIS DIFFERS FROM THE SPEC: If the current node is either a title, style |
| 1460 | 1460 | or script element, append the character to the current node regardless |
| 1461 | 1461 | of its content. */ |
| 1462 | - if(($token['type'] === HTML5::CHARACTR && |
|
| 1462 | + if (($token['type'] === HTML5::CHARACTR && |
|
| 1463 | 1463 | preg_match('/^[\t\n\x0b\x0c ]+$/', $token['data'])) || ( |
| 1464 | 1464 | $token['type'] === HTML5::CHARACTR && in_array(end($this->stack)->nodeName, |
| 1465 | 1465 | array('title', 'style', 'script')))) { |
@@ -1467,22 +1467,22 @@ discard block |
||
| 1467 | 1467 | $this->insertText($token['data']); |
| 1468 | 1468 | |
| 1469 | 1469 | /* A comment token */ |
| 1470 | - } elseif($token['type'] === HTML5::COMMENT) { |
|
| 1470 | + } elseif ($token['type'] === HTML5::COMMENT) { |
|
| 1471 | 1471 | /* Append a Comment node to the current node with the data attribute |
| 1472 | 1472 | set to the data given in the comment token. */ |
| 1473 | 1473 | $this->insertComment($token['data']); |
| 1474 | 1474 | |
| 1475 | - } elseif($token['type'] === HTML5::ENDTAG && |
|
| 1475 | + } elseif ($token['type'] === HTML5::ENDTAG && |
|
| 1476 | 1476 | in_array($token['name'], array('title', 'style', 'script'))) { |
| 1477 | 1477 | array_pop($this->stack); |
| 1478 | 1478 | return HTML5::PCDATA; |
| 1479 | 1479 | |
| 1480 | 1480 | /* A start tag with the tag name "title" */ |
| 1481 | - } elseif($token['type'] === HTML5::STARTTAG && $token['name'] === 'title') { |
|
| 1481 | + } elseif ($token['type'] === HTML5::STARTTAG && $token['name'] === 'title') { |
|
| 1482 | 1482 | /* Create an element for the token and append the new element to the |
| 1483 | 1483 | node pointed to by the head element pointer, or, if that is null |
| 1484 | 1484 | (innerHTML case), to the current node. */ |
| 1485 | - if($this->head_pointer !== null) { |
|
| 1485 | + if ($this->head_pointer !== null) { |
|
| 1486 | 1486 | $element = $this->insertElement($token, false); |
| 1487 | 1487 | $this->head_pointer->appendChild($element); |
| 1488 | 1488 | |
@@ -1494,11 +1494,11 @@ discard block |
||
| 1494 | 1494 | return HTML5::RCDATA; |
| 1495 | 1495 | |
| 1496 | 1496 | /* A start tag with the tag name "style" */ |
| 1497 | - } elseif($token['type'] === HTML5::STARTTAG && $token['name'] === 'style') { |
|
| 1497 | + } elseif ($token['type'] === HTML5::STARTTAG && $token['name'] === 'style') { |
|
| 1498 | 1498 | /* Create an element for the token and append the new element to the |
| 1499 | 1499 | node pointed to by the head element pointer, or, if that is null |
| 1500 | 1500 | (innerHTML case), to the current node. */ |
| 1501 | - if($this->head_pointer !== null) { |
|
| 1501 | + if ($this->head_pointer !== null) { |
|
| 1502 | 1502 | $element = $this->insertElement($token, false); |
| 1503 | 1503 | $this->head_pointer->appendChild($element); |
| 1504 | 1504 | |
@@ -1510,7 +1510,7 @@ discard block |
||
| 1510 | 1510 | return HTML5::CDATA; |
| 1511 | 1511 | |
| 1512 | 1512 | /* A start tag with the tag name "script" */ |
| 1513 | - } elseif($token['type'] === HTML5::STARTTAG && $token['name'] === 'script') { |
|
| 1513 | + } elseif ($token['type'] === HTML5::STARTTAG && $token['name'] === 'script') { |
|
| 1514 | 1514 | /* Create an element for the token. */ |
| 1515 | 1515 | $element = $this->insertElement($token, false); |
| 1516 | 1516 | $this->head_pointer->appendChild($element); |
@@ -1519,12 +1519,12 @@ discard block |
||
| 1519 | 1519 | return HTML5::CDATA; |
| 1520 | 1520 | |
| 1521 | 1521 | /* A start tag with the tag name "base", "link", or "meta" */ |
| 1522 | - } elseif($token['type'] === HTML5::STARTTAG && in_array($token['name'], |
|
| 1522 | + } elseif ($token['type'] === HTML5::STARTTAG && in_array($token['name'], |
|
| 1523 | 1523 | array('base', 'link', 'meta'))) { |
| 1524 | 1524 | /* Create an element for the token and append the new element to the |
| 1525 | 1525 | node pointed to by the head element pointer, or, if that is null |
| 1526 | 1526 | (innerHTML case), to the current node. */ |
| 1527 | - if($this->head_pointer !== null) { |
|
| 1527 | + if ($this->head_pointer !== null) { |
|
| 1528 | 1528 | $element = $this->insertElement($token, false); |
| 1529 | 1529 | $this->head_pointer->appendChild($element); |
| 1530 | 1530 | array_pop($this->stack); |
@@ -1534,10 +1534,10 @@ discard block |
||
| 1534 | 1534 | } |
| 1535 | 1535 | |
| 1536 | 1536 | /* An end tag with the tag name "head" */ |
| 1537 | - } elseif($token['type'] === HTML5::ENDTAG && $token['name'] === 'head') { |
|
| 1537 | + } elseif ($token['type'] === HTML5::ENDTAG && $token['name'] === 'head') { |
|
| 1538 | 1538 | /* If the current node is a head element, pop the current node off |
| 1539 | 1539 | the stack of open elements. */ |
| 1540 | - if($this->head_pointer->isSameNode(end($this->stack))) { |
|
| 1540 | + if ($this->head_pointer->isSameNode(end($this->stack))) { |
|
| 1541 | 1541 | array_pop($this->stack); |
| 1542 | 1542 | |
| 1543 | 1543 | /* Otherwise, this is a parse error. */ |
@@ -1549,7 +1549,7 @@ discard block |
||
| 1549 | 1549 | $this->mode = self::AFTER_HEAD; |
| 1550 | 1550 | |
| 1551 | 1551 | /* A start tag with the tag name "head" or an end tag except "html". */ |
| 1552 | - } elseif(($token['type'] === HTML5::STARTTAG && $token['name'] === 'head') || |
|
| 1552 | + } elseif (($token['type'] === HTML5::STARTTAG && $token['name'] === 'head') || |
|
| 1553 | 1553 | ($token['type'] === HTML5::ENDTAG && $token['name'] !== 'html')) { |
| 1554 | 1554 | // Parse error. Ignore the token. |
| 1555 | 1555 | |
@@ -1557,7 +1557,7 @@ discard block |
||
| 1557 | 1557 | } else { |
| 1558 | 1558 | /* If the current node is a head element, act as if an end tag |
| 1559 | 1559 | token with the tag name "head" had been seen. */ |
| 1560 | - if($this->head_pointer->isSameNode(end($this->stack))) { |
|
| 1560 | + if ($this->head_pointer->isSameNode(end($this->stack))) { |
|
| 1561 | 1561 | $this->inHead(array( |
| 1562 | 1562 | 'name' => 'head', |
| 1563 | 1563 | 'type' => HTML5::ENDTAG |
@@ -1579,19 +1579,19 @@ discard block |
||
| 1579 | 1579 | /* A character token that is one of one of U+0009 CHARACTER TABULATION, |
| 1580 | 1580 | U+000A LINE FEED (LF), U+000B LINE TABULATION, U+000C FORM FEED (FF), |
| 1581 | 1581 | or U+0020 SPACE */ |
| 1582 | - if($token['type'] === HTML5::CHARACTR && |
|
| 1582 | + if ($token['type'] === HTML5::CHARACTR && |
|
| 1583 | 1583 | preg_match('/^[\t\n\x0b\x0c ]+$/', $token['data'])) { |
| 1584 | 1584 | /* Append the character to the current node. */ |
| 1585 | 1585 | $this->insertText($token['data']); |
| 1586 | 1586 | |
| 1587 | 1587 | /* A comment token */ |
| 1588 | - } elseif($token['type'] === HTML5::COMMENT) { |
|
| 1588 | + } elseif ($token['type'] === HTML5::COMMENT) { |
|
| 1589 | 1589 | /* Append a Comment node to the current node with the data attribute |
| 1590 | 1590 | set to the data given in the comment token. */ |
| 1591 | 1591 | $this->insertComment($token['data']); |
| 1592 | 1592 | |
| 1593 | 1593 | /* A start tag token with the tag name "body" */ |
| 1594 | - } elseif($token['type'] === HTML5::STARTTAG && $token['name'] === 'body') { |
|
| 1594 | + } elseif ($token['type'] === HTML5::STARTTAG && $token['name'] === 'body') { |
|
| 1595 | 1595 | /* Insert a body element for the token. */ |
| 1596 | 1596 | $this->insertElement($token); |
| 1597 | 1597 | |
@@ -1599,7 +1599,7 @@ discard block |
||
| 1599 | 1599 | $this->mode = self::IN_BODY; |
| 1600 | 1600 | |
| 1601 | 1601 | /* A start tag token with the tag name "frameset" */ |
| 1602 | - } elseif($token['type'] === HTML5::STARTTAG && $token['name'] === 'frameset') { |
|
| 1602 | + } elseif ($token['type'] === HTML5::STARTTAG && $token['name'] === 'frameset') { |
|
| 1603 | 1603 | /* Insert a frameset element for the token. */ |
| 1604 | 1604 | $this->insertElement($token); |
| 1605 | 1605 | |
@@ -1608,7 +1608,7 @@ discard block |
||
| 1608 | 1608 | |
| 1609 | 1609 | /* A start tag token whose tag name is one of: "base", "link", "meta", |
| 1610 | 1610 | "script", "style", "title" */ |
| 1611 | - } elseif($token['type'] === HTML5::STARTTAG && in_array($token['name'], |
|
| 1611 | + } elseif ($token['type'] === HTML5::STARTTAG && in_array($token['name'], |
|
| 1612 | 1612 | array('base', 'link', 'meta', 'script', 'style', 'title'))) { |
| 1613 | 1613 | /* Parse error. Switch the insertion mode back to "in head" and |
| 1614 | 1614 | reprocess the token. */ |
@@ -1632,7 +1632,7 @@ discard block |
||
| 1632 | 1632 | private function inBody($token) { |
| 1633 | 1633 | /* Handle the token as follows: */ |
| 1634 | 1634 | |
| 1635 | - switch($token['type']) { |
|
| 1635 | + switch ($token['type']) { |
|
| 1636 | 1636 | /* A character token */ |
| 1637 | 1637 | case HTML5::CHARACTR: |
| 1638 | 1638 | /* Reconstruct the active formatting elements, if any. */ |
@@ -1650,7 +1650,7 @@ discard block |
||
| 1650 | 1650 | break; |
| 1651 | 1651 | |
| 1652 | 1652 | case HTML5::STARTTAG: |
| 1653 | - switch($token['name']) { |
|
| 1653 | + switch ($token['name']) { |
|
| 1654 | 1654 | /* A start tag token whose tag name is one of: "script", |
| 1655 | 1655 | "style" */ |
| 1656 | 1656 | case 'script': case 'style': |
@@ -1673,7 +1673,7 @@ discard block |
||
| 1673 | 1673 | elements is not a body element, or, if the stack of open |
| 1674 | 1674 | elements has only one node on it, then ignore the token. |
| 1675 | 1675 | (innerHTML case) */ |
| 1676 | - if(count($this->stack) === 1 || $this->stack[1]->nodeName !== 'body') { |
|
| 1676 | + if (count($this->stack) === 1 || $this->stack[1]->nodeName !== 'body') { |
|
| 1677 | 1677 | // Ignore |
| 1678 | 1678 | |
| 1679 | 1679 | /* Otherwise, for each attribute on the token, check to see |
@@ -1682,8 +1682,8 @@ discard block |
||
| 1682 | 1682 | add the attribute and its corresponding value to that |
| 1683 | 1683 | element. */ |
| 1684 | 1684 | } else { |
| 1685 | - foreach($token['attr'] as $attr) { |
|
| 1686 | - if(!$this->stack[1]->hasAttribute($attr['name'])) { |
|
| 1685 | + foreach ($token['attr'] as $attr) { |
|
| 1686 | + if (!$this->stack[1]->hasAttribute($attr['name'])) { |
|
| 1687 | 1687 | $this->stack[1]->setAttribute($attr['name'], $attr['value']); |
| 1688 | 1688 | } |
| 1689 | 1689 | } |
@@ -1699,7 +1699,7 @@ discard block |
||
| 1699 | 1699 | /* If the stack of open elements has a p element in scope, |
| 1700 | 1700 | then act as if an end tag with the tag name p had been |
| 1701 | 1701 | seen. */ |
| 1702 | - if($this->elementInScope('p')) { |
|
| 1702 | + if ($this->elementInScope('p')) { |
|
| 1703 | 1703 | $this->emitToken(array( |
| 1704 | 1704 | 'name' => 'p', |
| 1705 | 1705 | 'type' => HTML5::ENDTAG |
@@ -1714,7 +1714,7 @@ discard block |
||
| 1714 | 1714 | case 'form': |
| 1715 | 1715 | /* If the form element pointer is not null, ignore the |
| 1716 | 1716 | token with a parse error. */ |
| 1717 | - if($this->form_pointer !== null) { |
|
| 1717 | + if ($this->form_pointer !== null) { |
|
| 1718 | 1718 | // Ignore. |
| 1719 | 1719 | |
| 1720 | 1720 | /* Otherwise: */ |
@@ -1722,7 +1722,7 @@ discard block |
||
| 1722 | 1722 | /* If the stack of open elements has a p element in |
| 1723 | 1723 | scope, then act as if an end tag with the tag name p |
| 1724 | 1724 | had been seen. */ |
| 1725 | - if($this->elementInScope('p')) { |
|
| 1725 | + if ($this->elementInScope('p')) { |
|
| 1726 | 1726 | $this->emitToken(array( |
| 1727 | 1727 | 'name' => 'p', |
| 1728 | 1728 | 'type' => HTML5::ENDTAG |
@@ -1741,7 +1741,7 @@ discard block |
||
| 1741 | 1741 | /* If the stack of open elements has a p element in scope, |
| 1742 | 1742 | then act as if an end tag with the tag name p had been |
| 1743 | 1743 | seen. */ |
| 1744 | - if($this->elementInScope('p')) { |
|
| 1744 | + if ($this->elementInScope('p')) { |
|
| 1745 | 1745 | $this->emitToken(array( |
| 1746 | 1746 | 'name' => 'p', |
| 1747 | 1747 | 'type' => HTML5::ENDTAG |
@@ -1750,7 +1750,7 @@ discard block |
||
| 1750 | 1750 | |
| 1751 | 1751 | $stack_length = count($this->stack) - 1; |
| 1752 | 1752 | |
| 1753 | - for($n = $stack_length; 0 <= $n; $n--) { |
|
| 1753 | + for ($n = $stack_length; 0 <= $n; $n--) { |
|
| 1754 | 1754 | /* 1. Initialise node to be the current node (the |
| 1755 | 1755 | bottommost node of the stack). */ |
| 1756 | 1756 | $stop = false; |
@@ -1760,9 +1760,9 @@ discard block |
||
| 1760 | 1760 | /* 2. If node is an li, dd or dt element, then pop all |
| 1761 | 1761 | the nodes from the current node up to node, including |
| 1762 | 1762 | node, then stop this algorithm. */ |
| 1763 | - if($token['name'] === $node->tagName || ($token['name'] !== 'li' |
|
| 1763 | + if ($token['name'] === $node->tagName || ($token['name'] !== 'li' |
|
| 1764 | 1764 | && ($node->tagName === 'dd' || $node->tagName === 'dt'))) { |
| 1765 | - for($x = $stack_length; $x >= $n ; $x--) { |
|
| 1765 | + for ($x = $stack_length; $x >= $n; $x--) { |
|
| 1766 | 1766 | array_pop($this->stack); |
| 1767 | 1767 | } |
| 1768 | 1768 | |
@@ -1772,7 +1772,7 @@ discard block |
||
| 1772 | 1772 | /* 3. If node is not in the formatting category, and is |
| 1773 | 1773 | not in the phrasing category, and is not an address or |
| 1774 | 1774 | div element, then stop this algorithm. */ |
| 1775 | - if($cat !== self::FORMATTING && $cat !== self::PHRASING && |
|
| 1775 | + if ($cat !== self::FORMATTING && $cat !== self::PHRASING && |
|
| 1776 | 1776 | $node->tagName !== 'address' && $node->tagName !== 'div') { |
| 1777 | 1777 | break; |
| 1778 | 1778 | } |
@@ -1788,7 +1788,7 @@ discard block |
||
| 1788 | 1788 | /* If the stack of open elements has a p element in scope, |
| 1789 | 1789 | then act as if an end tag with the tag name p had been |
| 1790 | 1790 | seen. */ |
| 1791 | - if($this->elementInScope('p')) { |
|
| 1791 | + if ($this->elementInScope('p')) { |
|
| 1792 | 1792 | $this->emitToken(array( |
| 1793 | 1793 | 'name' => 'p', |
| 1794 | 1794 | 'type' => HTML5::ENDTAG |
@@ -1806,7 +1806,7 @@ discard block |
||
| 1806 | 1806 | case 'h1': case 'h2': case 'h3': case 'h4': case 'h5': case 'h6': |
| 1807 | 1807 | /* If the stack of open elements has a p element in scope, |
| 1808 | 1808 | then act as if an end tag with the tag name p had been seen. */ |
| 1809 | - if($this->elementInScope('p')) { |
|
| 1809 | + if ($this->elementInScope('p')) { |
|
| 1810 | 1810 | $this->emitToken(array( |
| 1811 | 1811 | 'name' => 'p', |
| 1812 | 1812 | 'type' => HTML5::ENDTAG |
@@ -1818,7 +1818,7 @@ discard block |
||
| 1818 | 1818 | this is a parse error; pop elements from the stack until an |
| 1819 | 1819 | element with one of those tag names has been popped from the |
| 1820 | 1820 | stack. */ |
| 1821 | - while($this->elementInScope(array('h1', 'h2', 'h3', 'h4', 'h5', 'h6'))) { |
|
| 1821 | + while ($this->elementInScope(array('h1', 'h2', 'h3', 'h4', 'h5', 'h6'))) { |
|
| 1822 | 1822 | array_pop($this->stack); |
| 1823 | 1823 | } |
| 1824 | 1824 | |
@@ -1839,11 +1839,11 @@ discard block |
||
| 1839 | 1839 | might not have if the element is not in table scope). */ |
| 1840 | 1840 | $leng = count($this->a_formatting); |
| 1841 | 1841 | |
| 1842 | - for($n = $leng - 1; $n >= 0; $n--) { |
|
| 1843 | - if($this->a_formatting[$n] === self::MARKER) { |
|
| 1842 | + for ($n = $leng - 1; $n >= 0; $n--) { |
|
| 1843 | + if ($this->a_formatting[$n] === self::MARKER) { |
|
| 1844 | 1844 | break; |
| 1845 | 1845 | |
| 1846 | - } elseif($this->a_formatting[$n]->nodeName === 'a') { |
|
| 1846 | + } elseif ($this->a_formatting[$n]->nodeName === 'a') { |
|
| 1847 | 1847 | $this->emitToken(array( |
| 1848 | 1848 | 'name' => 'a', |
| 1849 | 1849 | 'type' => HTML5::ENDTAG |
@@ -1885,7 +1885,7 @@ discard block |
||
| 1885 | 1885 | then this is a parse error; act as if an end tag with the tag |
| 1886 | 1886 | name "button" had been seen, then reprocess the token. (We don't |
| 1887 | 1887 | do that. Unnecessary.) */ |
| 1888 | - if($this->elementInScope('button')) { |
|
| 1888 | + if ($this->elementInScope('button')) { |
|
| 1889 | 1889 | $this->inBody(array( |
| 1890 | 1890 | 'name' => 'button', |
| 1891 | 1891 | 'type' => HTML5::ENDTAG |
@@ -1932,7 +1932,7 @@ discard block |
||
| 1932 | 1932 | case 'table': |
| 1933 | 1933 | /* If the stack of open elements has a p element in scope, |
| 1934 | 1934 | then act as if an end tag with the tag name p had been seen. */ |
| 1935 | - if($this->elementInScope('p')) { |
|
| 1935 | + if ($this->elementInScope('p')) { |
|
| 1936 | 1936 | $this->emitToken(array( |
| 1937 | 1937 | 'name' => 'p', |
| 1938 | 1938 | 'type' => HTML5::ENDTAG |
@@ -1965,7 +1965,7 @@ discard block |
||
| 1965 | 1965 | case 'hr': |
| 1966 | 1966 | /* If the stack of open elements has a p element in scope, |
| 1967 | 1967 | then act as if an end tag with the tag name p had been seen. */ |
| 1968 | - if($this->elementInScope('p')) { |
|
| 1968 | + if ($this->elementInScope('p')) { |
|
| 1969 | 1969 | $this->emitToken(array( |
| 1970 | 1970 | 'name' => 'p', |
| 1971 | 1971 | 'type' => HTML5::ENDTAG |
@@ -2013,7 +2013,7 @@ discard block |
||
| 2013 | 2013 | |
| 2014 | 2014 | /* If the form element pointer is not null, |
| 2015 | 2015 | then ignore the token. */ |
| 2016 | - if($this->form_pointer === null) { |
|
| 2016 | + if ($this->form_pointer === null) { |
|
| 2017 | 2017 | /* Act as if a start tag token with the tag name "form" had |
| 2018 | 2018 | been seen. */ |
| 2019 | 2019 | $this->inBody(array( |
@@ -2158,18 +2158,18 @@ discard block |
||
| 2158 | 2158 | break; |
| 2159 | 2159 | |
| 2160 | 2160 | case HTML5::ENDTAG: |
| 2161 | - switch($token['name']) { |
|
| 2161 | + switch ($token['name']) { |
|
| 2162 | 2162 | /* An end tag with the tag name "body" */ |
| 2163 | 2163 | case 'body': |
| 2164 | 2164 | /* If the second element in the stack of open elements is |
| 2165 | 2165 | not a body element, this is a parse error. Ignore the token. |
| 2166 | 2166 | (innerHTML case) */ |
| 2167 | - if(count($this->stack) < 2 || $this->stack[1]->nodeName !== 'body') { |
|
| 2167 | + if (count($this->stack) < 2 || $this->stack[1]->nodeName !== 'body') { |
|
| 2168 | 2168 | // Ignore. |
| 2169 | 2169 | |
| 2170 | 2170 | /* If the current node is not the body element, then this |
| 2171 | 2171 | is a parse error. */ |
| 2172 | - } elseif(end($this->stack)->nodeName !== 'body') { |
|
| 2172 | + } elseif (end($this->stack)->nodeName !== 'body') { |
|
| 2173 | 2173 | // Parse error. |
| 2174 | 2174 | } |
| 2175 | 2175 | |
@@ -2199,7 +2199,7 @@ discard block |
||
| 2199 | 2199 | /* If the stack of open elements has an element in scope |
| 2200 | 2200 | with the same tag name as that of the token, then generate |
| 2201 | 2201 | implied end tags. */ |
| 2202 | - if($this->elementInScope($token['name'])) { |
|
| 2202 | + if ($this->elementInScope($token['name'])) { |
|
| 2203 | 2203 | $this->generateImpliedEndTags(); |
| 2204 | 2204 | |
| 2205 | 2205 | /* Now, if the current node is not an element with |
@@ -2211,8 +2211,8 @@ discard block |
||
| 2211 | 2211 | scope with the same tag name as that of the token, |
| 2212 | 2212 | then pop elements from this stack until an element |
| 2213 | 2213 | with that tag name has been popped from the stack. */ |
| 2214 | - for($n = count($this->stack) - 1; $n >= 0; $n--) { |
|
| 2215 | - if($this->stack[$n]->nodeName === $token['name']) { |
|
| 2214 | + for ($n = count($this->stack) - 1; $n >= 0; $n--) { |
|
| 2215 | + if ($this->stack[$n]->nodeName === $token['name']) { |
|
| 2216 | 2216 | $n = -1; |
| 2217 | 2217 | } |
| 2218 | 2218 | |
@@ -2226,12 +2226,12 @@ discard block |
||
| 2226 | 2226 | /* If the stack of open elements has an element in scope |
| 2227 | 2227 | with the same tag name as that of the token, then generate |
| 2228 | 2228 | implied end tags. */ |
| 2229 | - if($this->elementInScope($token['name'])) { |
|
| 2229 | + if ($this->elementInScope($token['name'])) { |
|
| 2230 | 2230 | $this->generateImpliedEndTags(); |
| 2231 | 2231 | |
| 2232 | 2232 | } |
| 2233 | 2233 | |
| 2234 | - if(end($this->stack)->nodeName !== $token['name']) { |
|
| 2234 | + if (end($this->stack)->nodeName !== $token['name']) { |
|
| 2235 | 2235 | /* Now, if the current node is not an element with the |
| 2236 | 2236 | same tag name as that of the token, then this is a parse |
| 2237 | 2237 | error. */ |
@@ -2252,7 +2252,7 @@ discard block |
||
| 2252 | 2252 | case 'p': |
| 2253 | 2253 | /* If the stack of open elements has a p element in scope, |
| 2254 | 2254 | then generate implied end tags, except for p elements. */ |
| 2255 | - if($this->elementInScope('p')) { |
|
| 2255 | + if ($this->elementInScope('p')) { |
|
| 2256 | 2256 | $this->generateImpliedEndTags(array('p')); |
| 2257 | 2257 | |
| 2258 | 2258 | /* If the current node is not a p element, then this is |
@@ -2262,8 +2262,8 @@ discard block |
||
| 2262 | 2262 | /* If the stack of open elements has a p element in |
| 2263 | 2263 | scope, then pop elements from this stack until the stack |
| 2264 | 2264 | no longer has a p element in scope. */ |
| 2265 | - for($n = count($this->stack) - 1; $n >= 0; $n--) { |
|
| 2266 | - if($this->elementInScope('p')) { |
|
| 2265 | + for ($n = count($this->stack) - 1; $n >= 0; $n--) { |
|
| 2266 | + if ($this->elementInScope('p')) { |
|
| 2267 | 2267 | array_pop($this->stack); |
| 2268 | 2268 | |
| 2269 | 2269 | } else { |
@@ -2279,7 +2279,7 @@ discard block |
||
| 2279 | 2279 | whose tag name matches the tag name of the token, then |
| 2280 | 2280 | generate implied end tags, except for elements with the |
| 2281 | 2281 | same tag name as the token. */ |
| 2282 | - if($this->elementInScope($token['name'])) { |
|
| 2282 | + if ($this->elementInScope($token['name'])) { |
|
| 2283 | 2283 | $this->generateImpliedEndTags(array($token['name'])); |
| 2284 | 2284 | |
| 2285 | 2285 | /* If the current node is not an element with the same |
@@ -2290,8 +2290,8 @@ discard block |
||
| 2290 | 2290 | whose tag name matches the tag name of the token, then |
| 2291 | 2291 | pop elements from this stack until an element with that |
| 2292 | 2292 | tag name has been popped from the stack. */ |
| 2293 | - for($n = count($this->stack) - 1; $n >= 0; $n--) { |
|
| 2294 | - if($this->stack[$n]->nodeName === $token['name']) { |
|
| 2293 | + for ($n = count($this->stack) - 1; $n >= 0; $n--) { |
|
| 2294 | + if ($this->stack[$n]->nodeName === $token['name']) { |
|
| 2295 | 2295 | $n = -1; |
| 2296 | 2296 | } |
| 2297 | 2297 | |
@@ -2308,7 +2308,7 @@ discard block |
||
| 2308 | 2308 | /* If the stack of open elements has in scope an element whose |
| 2309 | 2309 | tag name is one of "h1", "h2", "h3", "h4", "h5", or "h6", then |
| 2310 | 2310 | generate implied end tags. */ |
| 2311 | - if($this->elementInScope($elements)) { |
|
| 2311 | + if ($this->elementInScope($elements)) { |
|
| 2312 | 2312 | $this->generateImpliedEndTags(); |
| 2313 | 2313 | |
| 2314 | 2314 | /* Now, if the current node is not an element with the same |
@@ -2319,7 +2319,7 @@ discard block |
||
| 2319 | 2319 | whose tag name is one of "h1", "h2", "h3", "h4", "h5", or |
| 2320 | 2320 | "h6", then pop elements from the stack until an element |
| 2321 | 2321 | with one of those tag names has been popped from the stack. */ |
| 2322 | - while($this->elementInScope($elements)) { |
|
| 2322 | + while ($this->elementInScope($elements)) { |
|
| 2323 | 2323 | array_pop($this->stack); |
| 2324 | 2324 | } |
| 2325 | 2325 | } |
@@ -2337,12 +2337,12 @@ discard block |
||
| 2337 | 2337 | otherwise, and |
| 2338 | 2338 | * has the same tag name as the token. |
| 2339 | 2339 | */ |
| 2340 | - while(true) { |
|
| 2341 | - for($a = count($this->a_formatting) - 1; $a >= 0; $a--) { |
|
| 2342 | - if($this->a_formatting[$a] === self::MARKER) { |
|
| 2340 | + while (true) { |
|
| 2341 | + for ($a = count($this->a_formatting) - 1; $a >= 0; $a--) { |
|
| 2342 | + if ($this->a_formatting[$a] === self::MARKER) { |
|
| 2343 | 2343 | break; |
| 2344 | 2344 | |
| 2345 | - } elseif($this->a_formatting[$a]->tagName === $token['name']) { |
|
| 2345 | + } elseif ($this->a_formatting[$a]->tagName === $token['name']) { |
|
| 2346 | 2346 | $formatting_element = $this->a_formatting[$a]; |
| 2347 | 2347 | $in_stack = in_array($formatting_element, $this->stack, true); |
| 2348 | 2348 | $fe_af_pos = $a; |
@@ -2354,7 +2354,7 @@ discard block |
||
| 2354 | 2354 | also in the stack of open elements but the element |
| 2355 | 2355 | is not in scope, then this is a parse error. Abort |
| 2356 | 2356 | these steps. The token is ignored. */ |
| 2357 | - if(!isset($formatting_element) || ($in_stack && |
|
| 2357 | + if (!isset($formatting_element) || ($in_stack && |
|
| 2358 | 2358 | !$this->elementInScope($token['name']))) { |
| 2359 | 2359 | break; |
| 2360 | 2360 | |
@@ -2362,7 +2362,7 @@ discard block |
||
| 2362 | 2362 | is not in the stack of open elements, then this is a |
| 2363 | 2363 | parse error; remove the element from the list, and |
| 2364 | 2364 | abort these steps. */ |
| 2365 | - } elseif(isset($formatting_element) && !$in_stack) { |
|
| 2365 | + } elseif (isset($formatting_element) && !$in_stack) { |
|
| 2366 | 2366 | unset($this->a_formatting[$fe_af_pos]); |
| 2367 | 2367 | $this->a_formatting = array_merge($this->a_formatting); |
| 2368 | 2368 | break; |
@@ -2376,10 +2376,10 @@ discard block |
||
| 2376 | 2376 | $fe_s_pos = array_search($formatting_element, $this->stack, true); |
| 2377 | 2377 | $length = count($this->stack); |
| 2378 | 2378 | |
| 2379 | - for($s = $fe_s_pos + 1; $s < $length; $s++) { |
|
| 2379 | + for ($s = $fe_s_pos + 1; $s < $length; $s++) { |
|
| 2380 | 2380 | $category = $this->getElementCategory($this->stack[$s]->nodeName); |
| 2381 | 2381 | |
| 2382 | - if($category !== self::PHRASING && $category !== self::FORMATTING) { |
|
| 2382 | + if ($category !== self::PHRASING && $category !== self::FORMATTING) { |
|
| 2383 | 2383 | $furthest_block = $this->stack[$s]; |
| 2384 | 2384 | } |
| 2385 | 2385 | } |
@@ -2390,8 +2390,8 @@ discard block |
||
| 2390 | 2390 | elements, from the current node up to the formatting |
| 2391 | 2391 | element, and remove the formatting element from the |
| 2392 | 2392 | list of active formatting elements. */ |
| 2393 | - if(!isset($furthest_block)) { |
|
| 2394 | - for($n = $length - 1; $n >= $fe_s_pos; $n--) { |
|
| 2393 | + if (!isset($furthest_block)) { |
|
| 2394 | + for ($n = $length - 1; $n >= $fe_s_pos; $n--) { |
|
| 2395 | 2395 | array_pop($this->stack); |
| 2396 | 2396 | } |
| 2397 | 2397 | |
@@ -2407,7 +2407,7 @@ discard block |
||
| 2407 | 2407 | |
| 2408 | 2408 | /* 5. If the furthest block has a parent node, then |
| 2409 | 2409 | remove the furthest block from its parent node. */ |
| 2410 | - if($furthest_block->parentNode !== null) { |
|
| 2410 | + if ($furthest_block->parentNode !== null) { |
|
| 2411 | 2411 | $furthest_block->parentNode->removeChild($furthest_block); |
| 2412 | 2412 | } |
| 2413 | 2413 | |
@@ -2422,8 +2422,8 @@ discard block |
||
| 2422 | 2422 | $node = $furthest_block; |
| 2423 | 2423 | $last_node = $furthest_block; |
| 2424 | 2424 | |
| 2425 | - while(true) { |
|
| 2426 | - for($n = array_search($node, $this->stack, true) - 1; $n >= 0; $n--) { |
|
| 2425 | + while (true) { |
|
| 2426 | + for ($n = array_search($node, $this->stack, true) - 1; $n >= 0; $n--) { |
|
| 2427 | 2427 | /* 7.1 Let node be the element immediately |
| 2428 | 2428 | prior to node in the stack of open elements. */ |
| 2429 | 2429 | $node = $this->stack[$n]; |
@@ -2432,7 +2432,7 @@ discard block |
||
| 2432 | 2432 | formatting elements, then remove node from |
| 2433 | 2433 | the stack of open elements and then go back |
| 2434 | 2434 | to step 1. */ |
| 2435 | - if(!in_array($node, $this->a_formatting, true)) { |
|
| 2435 | + if (!in_array($node, $this->a_formatting, true)) { |
|
| 2436 | 2436 | unset($this->stack[$n]); |
| 2437 | 2437 | $this->stack = array_merge($this->stack); |
| 2438 | 2438 | |
@@ -2444,14 +2444,14 @@ discard block |
||
| 2444 | 2444 | /* 7.3 Otherwise, if node is the formatting |
| 2445 | 2445 | element, then go to the next step in the overall |
| 2446 | 2446 | algorithm. */ |
| 2447 | - if($node === $formatting_element) { |
|
| 2447 | + if ($node === $formatting_element) { |
|
| 2448 | 2448 | break; |
| 2449 | 2449 | |
| 2450 | 2450 | /* 7.4 Otherwise, if last node is the furthest |
| 2451 | 2451 | block, then move the aforementioned bookmark to |
| 2452 | 2452 | be immediately after the node in the list of |
| 2453 | 2453 | active formatting elements. */ |
| 2454 | - } elseif($last_node === $furthest_block) { |
|
| 2454 | + } elseif ($last_node === $furthest_block) { |
|
| 2455 | 2455 | $bookmark = array_search($node, $this->a_formatting, true) + 1; |
| 2456 | 2456 | } |
| 2457 | 2457 | |
@@ -2461,7 +2461,7 @@ discard block |
||
| 2461 | 2461 | with an entry for the clone, replace the entry |
| 2462 | 2462 | for node in the stack of open elements with an |
| 2463 | 2463 | entry for the clone, and let node be the clone. */ |
| 2464 | - if($node->hasChildNodes()) { |
|
| 2464 | + if ($node->hasChildNodes()) { |
|
| 2465 | 2465 | $clone = $node->cloneNode(); |
| 2466 | 2466 | $s_pos = array_search($node, $this->stack, true); |
| 2467 | 2467 | $a_pos = array_search($node, $this->a_formatting, true); |
@@ -2473,7 +2473,7 @@ discard block |
||
| 2473 | 2473 | |
| 2474 | 2474 | /* 7.6 Insert last node into node, first removing |
| 2475 | 2475 | it from its previous parent node if any. */ |
| 2476 | - if($last_node->parentNode !== null) { |
|
| 2476 | + if ($last_node->parentNode !== null) { |
|
| 2477 | 2477 | $last_node->parentNode->removeChild($last_node); |
| 2478 | 2478 | } |
| 2479 | 2479 | |
@@ -2487,7 +2487,7 @@ discard block |
||
| 2487 | 2487 | the previous step into the common ancestor node, |
| 2488 | 2488 | first removing it from its previous parent node if |
| 2489 | 2489 | any. */ |
| 2490 | - if($last_node->parentNode !== null) { |
|
| 2490 | + if ($last_node->parentNode !== null) { |
|
| 2491 | 2491 | $last_node->parentNode->removeChild($last_node); |
| 2492 | 2492 | } |
| 2493 | 2493 | |
@@ -2500,7 +2500,7 @@ discard block |
||
| 2500 | 2500 | /* 10. Take all of the child nodes of the furthest |
| 2501 | 2501 | block and append them to the clone created in the |
| 2502 | 2502 | last step. */ |
| 2503 | - while($furthest_block->hasChildNodes()) { |
|
| 2503 | + while ($furthest_block->hasChildNodes()) { |
|
| 2504 | 2504 | $child = $furthest_block->firstChild; |
| 2505 | 2505 | $furthest_block->removeChild($child); |
| 2506 | 2506 | $clone->appendChild($child); |
@@ -2545,7 +2545,7 @@ discard block |
||
| 2545 | 2545 | /* If the stack of open elements has an element in scope whose |
| 2546 | 2546 | tag name matches the tag name of the token, then generate implied |
| 2547 | 2547 | tags. */ |
| 2548 | - if($this->elementInScope($token['name'])) { |
|
| 2548 | + if ($this->elementInScope($token['name'])) { |
|
| 2549 | 2549 | $this->generateImpliedEndTags(); |
| 2550 | 2550 | |
| 2551 | 2551 | /* Now, if the current node is not an element with the same |
@@ -2557,8 +2557,8 @@ discard block |
||
| 2557 | 2557 | elements from the stack until that element has been popped from |
| 2558 | 2558 | the stack, and clear the list of active formatting elements up |
| 2559 | 2559 | to the last marker. */ |
| 2560 | - for($n = count($this->stack) - 1; $n >= 0; $n--) { |
|
| 2561 | - if($this->stack[$n]->nodeName === $token['name']) { |
|
| 2560 | + for ($n = count($this->stack) - 1; $n >= 0; $n--) { |
|
| 2561 | + if ($this->stack[$n]->nodeName === $token['name']) { |
|
| 2562 | 2562 | $n = -1; |
| 2563 | 2563 | } |
| 2564 | 2564 | |
@@ -2567,7 +2567,7 @@ discard block |
||
| 2567 | 2567 | |
| 2568 | 2568 | $marker = end(array_keys($this->a_formatting, self::MARKER, true)); |
| 2569 | 2569 | |
| 2570 | - for($n = count($this->a_formatting) - 1; $n > $marker; $n--) { |
|
| 2570 | + for ($n = count($this->a_formatting) - 1; $n > $marker; $n--) { |
|
| 2571 | 2571 | array_pop($this->a_formatting); |
| 2572 | 2572 | } |
| 2573 | 2573 | } |
@@ -2587,14 +2587,14 @@ discard block |
||
| 2587 | 2587 | |
| 2588 | 2588 | /* An end tag token not covered by the previous entries */ |
| 2589 | 2589 | default: |
| 2590 | - for($n = count($this->stack) - 1; $n >= 0; $n--) { |
|
| 2590 | + for ($n = count($this->stack) - 1; $n >= 0; $n--) { |
|
| 2591 | 2591 | /* Initialise node to be the current node (the bottommost |
| 2592 | 2592 | node of the stack). */ |
| 2593 | 2593 | $node = end($this->stack); |
| 2594 | 2594 | |
| 2595 | 2595 | /* If node has the same tag name as the end tag token, |
| 2596 | 2596 | then: */ |
| 2597 | - if($token['name'] === $node->nodeName) { |
|
| 2597 | + if ($token['name'] === $node->nodeName) { |
|
| 2598 | 2598 | /* Generate implied end tags. */ |
| 2599 | 2599 | $this->generateImpliedEndTags(); |
| 2600 | 2600 | |
@@ -2605,14 +2605,14 @@ discard block |
||
| 2605 | 2605 | |
| 2606 | 2606 | /* Pop all the nodes from the current node up to |
| 2607 | 2607 | node, including node, then stop this algorithm. */ |
| 2608 | - for($x = count($this->stack) - $n; $x >= $n; $x--) { |
|
| 2608 | + for ($x = count($this->stack) - $n; $x >= $n; $x--) { |
|
| 2609 | 2609 | array_pop($this->stack); |
| 2610 | 2610 | } |
| 2611 | 2611 | |
| 2612 | 2612 | } else { |
| 2613 | 2613 | $category = $this->getElementCategory($node); |
| 2614 | 2614 | |
| 2615 | - if($category !== self::SPECIAL && $category !== self::SCOPING) { |
|
| 2615 | + if ($category !== self::SPECIAL && $category !== self::SCOPING) { |
|
| 2616 | 2616 | /* Otherwise, if node is in neither the formatting |
| 2617 | 2617 | category nor the phrasing category, then this is a |
| 2618 | 2618 | parse error. Stop this algorithm. The end tag token |
@@ -2633,21 +2633,21 @@ discard block |
||
| 2633 | 2633 | /* A character token that is one of one of U+0009 CHARACTER TABULATION, |
| 2634 | 2634 | U+000A LINE FEED (LF), U+000B LINE TABULATION, U+000C FORM FEED (FF), |
| 2635 | 2635 | or U+0020 SPACE */ |
| 2636 | - if($token['type'] === HTML5::CHARACTR && |
|
| 2636 | + if ($token['type'] === HTML5::CHARACTR && |
|
| 2637 | 2637 | preg_match('/^[\t\n\x0b\x0c ]+$/', $token['data'])) { |
| 2638 | 2638 | /* Append the character to the current node. */ |
| 2639 | 2639 | $text = $this->dom->createTextNode($token['data']); |
| 2640 | 2640 | end($this->stack)->appendChild($text); |
| 2641 | 2641 | |
| 2642 | 2642 | /* A comment token */ |
| 2643 | - } elseif($token['type'] === HTML5::COMMENT) { |
|
| 2643 | + } elseif ($token['type'] === HTML5::COMMENT) { |
|
| 2644 | 2644 | /* Append a Comment node to the current node with the data |
| 2645 | 2645 | attribute set to the data given in the comment token. */ |
| 2646 | 2646 | $comment = $this->dom->createComment($token['data']); |
| 2647 | 2647 | end($this->stack)->appendChild($comment); |
| 2648 | 2648 | |
| 2649 | 2649 | /* A start tag whose tag name is "caption" */ |
| 2650 | - } elseif($token['type'] === HTML5::STARTTAG && |
|
| 2650 | + } elseif ($token['type'] === HTML5::STARTTAG && |
|
| 2651 | 2651 | $token['name'] === 'caption') { |
| 2652 | 2652 | /* Clear the stack back to a table context. */ |
| 2653 | 2653 | $this->clearStackToTableContext($clear); |
@@ -2662,7 +2662,7 @@ discard block |
||
| 2662 | 2662 | $this->mode = self::IN_CAPTION; |
| 2663 | 2663 | |
| 2664 | 2664 | /* A start tag whose tag name is "colgroup" */ |
| 2665 | - } elseif($token['type'] === HTML5::STARTTAG && |
|
| 2665 | + } elseif ($token['type'] === HTML5::STARTTAG && |
|
| 2666 | 2666 | $token['name'] === 'colgroup') { |
| 2667 | 2667 | /* Clear the stack back to a table context. */ |
| 2668 | 2668 | $this->clearStackToTableContext($clear); |
@@ -2673,7 +2673,7 @@ discard block |
||
| 2673 | 2673 | $this->mode = self::IN_CGROUP; |
| 2674 | 2674 | |
| 2675 | 2675 | /* A start tag whose tag name is "col" */ |
| 2676 | - } elseif($token['type'] === HTML5::STARTTAG && |
|
| 2676 | + } elseif ($token['type'] === HTML5::STARTTAG && |
|
| 2677 | 2677 | $token['name'] === 'col') { |
| 2678 | 2678 | $this->inTable(array( |
| 2679 | 2679 | 'name' => 'colgroup', |
@@ -2684,7 +2684,7 @@ discard block |
||
| 2684 | 2684 | $this->inColumnGroup($token); |
| 2685 | 2685 | |
| 2686 | 2686 | /* A start tag whose tag name is one of: "tbody", "tfoot", "thead" */ |
| 2687 | - } elseif($token['type'] === HTML5::STARTTAG && in_array($token['name'], |
|
| 2687 | + } elseif ($token['type'] === HTML5::STARTTAG && in_array($token['name'], |
|
| 2688 | 2688 | array('tbody', 'tfoot', 'thead'))) { |
| 2689 | 2689 | /* Clear the stack back to a table context. */ |
| 2690 | 2690 | $this->clearStackToTableContext($clear); |
@@ -2695,7 +2695,7 @@ discard block |
||
| 2695 | 2695 | $this->mode = self::IN_TBODY; |
| 2696 | 2696 | |
| 2697 | 2697 | /* A start tag whose tag name is one of: "td", "th", "tr" */ |
| 2698 | - } elseif($token['type'] === HTML5::STARTTAG && |
|
| 2698 | + } elseif ($token['type'] === HTML5::STARTTAG && |
|
| 2699 | 2699 | in_array($token['name'], array('td', 'th', 'tr'))) { |
| 2700 | 2700 | /* Act as if a start tag token with the tag name "tbody" had been |
| 2701 | 2701 | seen, then reprocess the current token. */ |
@@ -2708,7 +2708,7 @@ discard block |
||
| 2708 | 2708 | return $this->inTableBody($token); |
| 2709 | 2709 | |
| 2710 | 2710 | /* A start tag whose tag name is "table" */ |
| 2711 | - } elseif($token['type'] === HTML5::STARTTAG && |
|
| 2711 | + } elseif ($token['type'] === HTML5::STARTTAG && |
|
| 2712 | 2712 | $token['name'] === 'table') { |
| 2713 | 2713 | /* Parse error. Act as if an end tag token with the tag name "table" |
| 2714 | 2714 | had been seen, then, if that token wasn't ignored, reprocess the |
@@ -2721,12 +2721,12 @@ discard block |
||
| 2721 | 2721 | return $this->mainPhase($token); |
| 2722 | 2722 | |
| 2723 | 2723 | /* An end tag whose tag name is "table" */ |
| 2724 | - } elseif($token['type'] === HTML5::ENDTAG && |
|
| 2724 | + } elseif ($token['type'] === HTML5::ENDTAG && |
|
| 2725 | 2725 | $token['name'] === 'table') { |
| 2726 | 2726 | /* If the stack of open elements does not have an element in table |
| 2727 | 2727 | scope with the same tag name as the token, this is a parse error. |
| 2728 | 2728 | Ignore the token. (innerHTML case) */ |
| 2729 | - if(!$this->elementInScope($token['name'], true)) { |
|
| 2729 | + if (!$this->elementInScope($token['name'], true)) { |
|
| 2730 | 2730 | return false; |
| 2731 | 2731 | |
| 2732 | 2732 | /* Otherwise: */ |
@@ -2740,11 +2740,11 @@ discard block |
||
| 2740 | 2740 | |
| 2741 | 2741 | /* Pop elements from this stack until a table element has been |
| 2742 | 2742 | popped from the stack. */ |
| 2743 | - while(true) { |
|
| 2743 | + while (true) { |
|
| 2744 | 2744 | $current = end($this->stack)->nodeName; |
| 2745 | 2745 | array_pop($this->stack); |
| 2746 | 2746 | |
| 2747 | - if($current === 'table') { |
|
| 2747 | + if ($current === 'table') { |
|
| 2748 | 2748 | break; |
| 2749 | 2749 | } |
| 2750 | 2750 | } |
@@ -2755,7 +2755,7 @@ discard block |
||
| 2755 | 2755 | |
| 2756 | 2756 | /* An end tag whose tag name is one of: "body", "caption", "col", |
| 2757 | 2757 | "colgroup", "html", "tbody", "td", "tfoot", "th", "thead", "tr" */ |
| 2758 | - } elseif($token['type'] === HTML5::ENDTAG && in_array($token['name'], |
|
| 2758 | + } elseif ($token['type'] === HTML5::ENDTAG && in_array($token['name'], |
|
| 2759 | 2759 | array('body', 'caption', 'col', 'colgroup', 'html', 'tbody', 'td', |
| 2760 | 2760 | 'tfoot', 'th', 'thead', 'tr'))) { |
| 2761 | 2761 | // Parse error. Ignore the token. |
@@ -2768,7 +2768,7 @@ discard block |
||
| 2768 | 2768 | /* If the current node is a table, tbody, tfoot, thead, or tr |
| 2769 | 2769 | element, then, whenever a node would be inserted into the current |
| 2770 | 2770 | node, it must instead be inserted into the foster parent element. */ |
| 2771 | - if(in_array(end($this->stack)->nodeName, |
|
| 2771 | + if (in_array(end($this->stack)->nodeName, |
|
| 2772 | 2772 | array('table', 'tbody', 'tfoot', 'thead', 'tr'))) { |
| 2773 | 2773 | /* The foster parent element is the parent element of the last |
| 2774 | 2774 | table element in the stack of open elements, if there is a |
@@ -2781,20 +2781,20 @@ discard block |
||
| 2781 | 2781 | its parent node is not an element, then the foster parent |
| 2782 | 2782 | element is the element before the last table element in the |
| 2783 | 2783 | stack of open elements. */ |
| 2784 | - for($n = count($this->stack) - 1; $n >= 0; $n--) { |
|
| 2785 | - if($this->stack[$n]->nodeName === 'table') { |
|
| 2784 | + for ($n = count($this->stack) - 1; $n >= 0; $n--) { |
|
| 2785 | + if ($this->stack[$n]->nodeName === 'table') { |
|
| 2786 | 2786 | $table = $this->stack[$n]; |
| 2787 | 2787 | break; |
| 2788 | 2788 | } |
| 2789 | 2789 | } |
| 2790 | 2790 | |
| 2791 | - if(isset($table) && $table->parentNode !== null) { |
|
| 2791 | + if (isset($table) && $table->parentNode !== null) { |
|
| 2792 | 2792 | $this->foster_parent = $table->parentNode; |
| 2793 | 2793 | |
| 2794 | - } elseif(!isset($table)) { |
|
| 2794 | + } elseif (!isset($table)) { |
|
| 2795 | 2795 | $this->foster_parent = $this->stack[0]; |
| 2796 | 2796 | |
| 2797 | - } elseif(isset($table) && ($table->parentNode === null || |
|
| 2797 | + } elseif (isset($table) && ($table->parentNode === null || |
|
| 2798 | 2798 | $table->parentNode->nodeType !== XML_ELEMENT_NODE)) { |
| 2799 | 2799 | $this->foster_parent = $this->stack[$n - 1]; |
| 2800 | 2800 | } |
@@ -2806,11 +2806,11 @@ discard block |
||
| 2806 | 2806 | |
| 2807 | 2807 | private function inCaption($token) { |
| 2808 | 2808 | /* An end tag whose tag name is "caption" */ |
| 2809 | - if($token['type'] === HTML5::ENDTAG && $token['name'] === 'caption') { |
|
| 2809 | + if ($token['type'] === HTML5::ENDTAG && $token['name'] === 'caption') { |
|
| 2810 | 2810 | /* If the stack of open elements does not have an element in table |
| 2811 | 2811 | scope with the same tag name as the token, this is a parse error. |
| 2812 | 2812 | Ignore the token. (innerHTML case) */ |
| 2813 | - if(!$this->elementInScope($token['name'], true)) { |
|
| 2813 | + if (!$this->elementInScope($token['name'], true)) { |
|
| 2814 | 2814 | // Ignore |
| 2815 | 2815 | |
| 2816 | 2816 | /* Otherwise: */ |
@@ -2824,11 +2824,11 @@ discard block |
||
| 2824 | 2824 | |
| 2825 | 2825 | /* Pop elements from this stack until a caption element has |
| 2826 | 2826 | been popped from the stack. */ |
| 2827 | - while(true) { |
|
| 2827 | + while (true) { |
|
| 2828 | 2828 | $node = end($this->stack)->nodeName; |
| 2829 | 2829 | array_pop($this->stack); |
| 2830 | 2830 | |
| 2831 | - if($node === 'caption') { |
|
| 2831 | + if ($node === 'caption') { |
|
| 2832 | 2832 | break; |
| 2833 | 2833 | } |
| 2834 | 2834 | } |
@@ -2844,7 +2844,7 @@ discard block |
||
| 2844 | 2844 | /* A start tag whose tag name is one of: "caption", "col", "colgroup", |
| 2845 | 2845 | "tbody", "td", "tfoot", "th", "thead", "tr", or an end tag whose tag |
| 2846 | 2846 | name is "table" */ |
| 2847 | - } elseif(($token['type'] === HTML5::STARTTAG && in_array($token['name'], |
|
| 2847 | + } elseif (($token['type'] === HTML5::STARTTAG && in_array($token['name'], |
|
| 2848 | 2848 | array('caption', 'col', 'colgroup', 'tbody', 'td', 'tfoot', 'th', |
| 2849 | 2849 | 'thead', 'tr'))) || ($token['type'] === HTML5::ENDTAG && |
| 2850 | 2850 | $token['name'] === 'table')) { |
@@ -2860,7 +2860,7 @@ discard block |
||
| 2860 | 2860 | |
| 2861 | 2861 | /* An end tag whose tag name is one of: "body", "col", "colgroup", |
| 2862 | 2862 | "html", "tbody", "td", "tfoot", "th", "thead", "tr" */ |
| 2863 | - } elseif($token['type'] === HTML5::ENDTAG && in_array($token['name'], |
|
| 2863 | + } elseif ($token['type'] === HTML5::ENDTAG && in_array($token['name'], |
|
| 2864 | 2864 | array('body', 'col', 'colgroup', 'html', 'tbody', 'tfoot', 'th', |
| 2865 | 2865 | 'thead', 'tr'))) { |
| 2866 | 2866 | // Parse error. Ignore the token. |
@@ -2876,32 +2876,32 @@ discard block |
||
| 2876 | 2876 | /* A character token that is one of one of U+0009 CHARACTER TABULATION, |
| 2877 | 2877 | U+000A LINE FEED (LF), U+000B LINE TABULATION, U+000C FORM FEED (FF), |
| 2878 | 2878 | or U+0020 SPACE */ |
| 2879 | - if($token['type'] === HTML5::CHARACTR && |
|
| 2879 | + if ($token['type'] === HTML5::CHARACTR && |
|
| 2880 | 2880 | preg_match('/^[\t\n\x0b\x0c ]+$/', $token['data'])) { |
| 2881 | 2881 | /* Append the character to the current node. */ |
| 2882 | 2882 | $text = $this->dom->createTextNode($token['data']); |
| 2883 | 2883 | end($this->stack)->appendChild($text); |
| 2884 | 2884 | |
| 2885 | 2885 | /* A comment token */ |
| 2886 | - } elseif($token['type'] === HTML5::COMMENT) { |
|
| 2886 | + } elseif ($token['type'] === HTML5::COMMENT) { |
|
| 2887 | 2887 | /* Append a Comment node to the current node with the data |
| 2888 | 2888 | attribute set to the data given in the comment token. */ |
| 2889 | 2889 | $comment = $this->dom->createComment($token['data']); |
| 2890 | 2890 | end($this->stack)->appendChild($comment); |
| 2891 | 2891 | |
| 2892 | 2892 | /* A start tag whose tag name is "col" */ |
| 2893 | - } elseif($token['type'] === HTML5::STARTTAG && $token['name'] === 'col') { |
|
| 2893 | + } elseif ($token['type'] === HTML5::STARTTAG && $token['name'] === 'col') { |
|
| 2894 | 2894 | /* Insert a col element for the token. Immediately pop the current |
| 2895 | 2895 | node off the stack of open elements. */ |
| 2896 | 2896 | $this->insertElement($token); |
| 2897 | 2897 | array_pop($this->stack); |
| 2898 | 2898 | |
| 2899 | 2899 | /* An end tag whose tag name is "colgroup" */ |
| 2900 | - } elseif($token['type'] === HTML5::ENDTAG && |
|
| 2900 | + } elseif ($token['type'] === HTML5::ENDTAG && |
|
| 2901 | 2901 | $token['name'] === 'colgroup') { |
| 2902 | 2902 | /* If the current node is the root html element, then this is a |
| 2903 | 2903 | parse error, ignore the token. (innerHTML case) */ |
| 2904 | - if(end($this->stack)->nodeName === 'html') { |
|
| 2904 | + if (end($this->stack)->nodeName === 'html') { |
|
| 2905 | 2905 | // Ignore |
| 2906 | 2906 | |
| 2907 | 2907 | /* Otherwise, pop the current node (which will be a colgroup |
@@ -2913,7 +2913,7 @@ discard block |
||
| 2913 | 2913 | } |
| 2914 | 2914 | |
| 2915 | 2915 | /* An end tag whose tag name is "col" */ |
| 2916 | - } elseif($token['type'] === HTML5::ENDTAG && $token['name'] === 'col') { |
|
| 2916 | + } elseif ($token['type'] === HTML5::ENDTAG && $token['name'] === 'col') { |
|
| 2917 | 2917 | /* Parse error. Ignore the token. */ |
| 2918 | 2918 | |
| 2919 | 2919 | /* Anything else */ |
@@ -2933,7 +2933,7 @@ discard block |
||
| 2933 | 2933 | $clear = array('tbody', 'tfoot', 'thead', 'html'); |
| 2934 | 2934 | |
| 2935 | 2935 | /* A start tag whose tag name is "tr" */ |
| 2936 | - if($token['type'] === HTML5::STARTTAG && $token['name'] === 'tr') { |
|
| 2936 | + if ($token['type'] === HTML5::STARTTAG && $token['name'] === 'tr') { |
|
| 2937 | 2937 | /* Clear the stack back to a table body context. */ |
| 2938 | 2938 | $this->clearStackToTableContext($clear); |
| 2939 | 2939 | |
@@ -2943,8 +2943,8 @@ discard block |
||
| 2943 | 2943 | $this->mode = self::IN_ROW; |
| 2944 | 2944 | |
| 2945 | 2945 | /* A start tag whose tag name is one of: "th", "td" */ |
| 2946 | - } elseif($token['type'] === HTML5::STARTTAG && |
|
| 2947 | - ($token['name'] === 'th' || $token['name'] === 'td')) { |
|
| 2946 | + } elseif ($token['type'] === HTML5::STARTTAG && |
|
| 2947 | + ($token['name'] === 'th' || $token['name'] === 'td')) { |
|
| 2948 | 2948 | /* Parse error. Act as if a start tag with the tag name "tr" had |
| 2949 | 2949 | been seen, then reprocess the current token. */ |
| 2950 | 2950 | $this->inTableBody(array( |
@@ -2956,12 +2956,12 @@ discard block |
||
| 2956 | 2956 | return $this->inRow($token); |
| 2957 | 2957 | |
| 2958 | 2958 | /* An end tag whose tag name is one of: "tbody", "tfoot", "thead" */ |
| 2959 | - } elseif($token['type'] === HTML5::ENDTAG && |
|
| 2959 | + } elseif ($token['type'] === HTML5::ENDTAG && |
|
| 2960 | 2960 | in_array($token['name'], array('tbody', 'tfoot', 'thead'))) { |
| 2961 | 2961 | /* If the stack of open elements does not have an element in table |
| 2962 | 2962 | scope with the same tag name as the token, this is a parse error. |
| 2963 | 2963 | Ignore the token. */ |
| 2964 | - if(!$this->elementInScope($token['name'], true)) { |
|
| 2964 | + if (!$this->elementInScope($token['name'], true)) { |
|
| 2965 | 2965 | // Ignore |
| 2966 | 2966 | |
| 2967 | 2967 | /* Otherwise: */ |
@@ -2977,13 +2977,13 @@ discard block |
||
| 2977 | 2977 | |
| 2978 | 2978 | /* A start tag whose tag name is one of: "caption", "col", "colgroup", |
| 2979 | 2979 | "tbody", "tfoot", "thead", or an end tag whose tag name is "table" */ |
| 2980 | - } elseif(($token['type'] === HTML5::STARTTAG && in_array($token['name'], |
|
| 2980 | + } elseif (($token['type'] === HTML5::STARTTAG && in_array($token['name'], |
|
| 2981 | 2981 | array('caption', 'col', 'colgroup', 'tbody', 'tfoor', 'thead'))) || |
| 2982 | 2982 | ($token['type'] === HTML5::STARTTAG && $token['name'] === 'table')) { |
| 2983 | 2983 | /* If the stack of open elements does not have a tbody, thead, or |
| 2984 | 2984 | tfoot element in table scope, this is a parse error. Ignore the |
| 2985 | 2985 | token. (innerHTML case) */ |
| 2986 | - if(!$this->elementInScope(array('tbody', 'thead', 'tfoot'), true)) { |
|
| 2986 | + if (!$this->elementInScope(array('tbody', 'thead', 'tfoot'), true)) { |
|
| 2987 | 2987 | // Ignore. |
| 2988 | 2988 | |
| 2989 | 2989 | /* Otherwise: */ |
@@ -3004,7 +3004,7 @@ discard block |
||
| 3004 | 3004 | |
| 3005 | 3005 | /* An end tag whose tag name is one of: "body", "caption", "col", |
| 3006 | 3006 | "colgroup", "html", "td", "th", "tr" */ |
| 3007 | - } elseif($token['type'] === HTML5::ENDTAG && in_array($token['name'], |
|
| 3007 | + } elseif ($token['type'] === HTML5::ENDTAG && in_array($token['name'], |
|
| 3008 | 3008 | array('body', 'caption', 'col', 'colgroup', 'html', 'td', 'th', 'tr'))) { |
| 3009 | 3009 | /* Parse error. Ignore the token. */ |
| 3010 | 3010 | |
@@ -3019,7 +3019,7 @@ discard block |
||
| 3019 | 3019 | $clear = array('tr', 'html'); |
| 3020 | 3020 | |
| 3021 | 3021 | /* A start tag whose tag name is one of: "th", "td" */ |
| 3022 | - if($token['type'] === HTML5::STARTTAG && |
|
| 3022 | + if ($token['type'] === HTML5::STARTTAG && |
|
| 3023 | 3023 | ($token['name'] === 'th' || $token['name'] === 'td')) { |
| 3024 | 3024 | /* Clear the stack back to a table row context. */ |
| 3025 | 3025 | $this->clearStackToTableContext($clear); |
@@ -3034,11 +3034,11 @@ discard block |
||
| 3034 | 3034 | $this->a_formatting[] = self::MARKER; |
| 3035 | 3035 | |
| 3036 | 3036 | /* An end tag whose tag name is "tr" */ |
| 3037 | - } elseif($token['type'] === HTML5::ENDTAG && $token['name'] === 'tr') { |
|
| 3037 | + } elseif ($token['type'] === HTML5::ENDTAG && $token['name'] === 'tr') { |
|
| 3038 | 3038 | /* If the stack of open elements does not have an element in table |
| 3039 | 3039 | scope with the same tag name as the token, this is a parse error. |
| 3040 | 3040 | Ignore the token. (innerHTML case) */ |
| 3041 | - if(!$this->elementInScope($token['name'], true)) { |
|
| 3041 | + if (!$this->elementInScope($token['name'], true)) { |
|
| 3042 | 3042 | // Ignore. |
| 3043 | 3043 | |
| 3044 | 3044 | /* Otherwise: */ |
@@ -3055,7 +3055,7 @@ discard block |
||
| 3055 | 3055 | |
| 3056 | 3056 | /* A start tag whose tag name is one of: "caption", "col", "colgroup", |
| 3057 | 3057 | "tbody", "tfoot", "thead", "tr" or an end tag whose tag name is "table" */ |
| 3058 | - } elseif($token['type'] === HTML5::STARTTAG && in_array($token['name'], |
|
| 3058 | + } elseif ($token['type'] === HTML5::STARTTAG && in_array($token['name'], |
|
| 3059 | 3059 | array('caption', 'col', 'colgroup', 'tbody', 'tfoot', 'thead', 'tr'))) { |
| 3060 | 3060 | /* Act as if an end tag with the tag name "tr" had been seen, then, |
| 3061 | 3061 | if that token wasn't ignored, reprocess the current token. */ |
@@ -3067,12 +3067,12 @@ discard block |
||
| 3067 | 3067 | return $this->inCell($token); |
| 3068 | 3068 | |
| 3069 | 3069 | /* An end tag whose tag name is one of: "tbody", "tfoot", "thead" */ |
| 3070 | - } elseif($token['type'] === HTML5::ENDTAG && |
|
| 3070 | + } elseif ($token['type'] === HTML5::ENDTAG && |
|
| 3071 | 3071 | in_array($token['name'], array('tbody', 'tfoot', 'thead'))) { |
| 3072 | 3072 | /* If the stack of open elements does not have an element in table |
| 3073 | 3073 | scope with the same tag name as the token, this is a parse error. |
| 3074 | 3074 | Ignore the token. */ |
| 3075 | - if(!$this->elementInScope($token['name'], true)) { |
|
| 3075 | + if (!$this->elementInScope($token['name'], true)) { |
|
| 3076 | 3076 | // Ignore. |
| 3077 | 3077 | |
| 3078 | 3078 | /* Otherwise: */ |
@@ -3089,7 +3089,7 @@ discard block |
||
| 3089 | 3089 | |
| 3090 | 3090 | /* An end tag whose tag name is one of: "body", "caption", "col", |
| 3091 | 3091 | "colgroup", "html", "td", "th" */ |
| 3092 | - } elseif($token['type'] === HTML5::ENDTAG && in_array($token['name'], |
|
| 3092 | + } elseif ($token['type'] === HTML5::ENDTAG && in_array($token['name'], |
|
| 3093 | 3093 | array('body', 'caption', 'col', 'colgroup', 'html', 'td', 'th', 'tr'))) { |
| 3094 | 3094 | /* Parse error. Ignore the token. */ |
| 3095 | 3095 | |
@@ -3102,12 +3102,12 @@ discard block |
||
| 3102 | 3102 | |
| 3103 | 3103 | private function inCell($token) { |
| 3104 | 3104 | /* An end tag whose tag name is one of: "td", "th" */ |
| 3105 | - if($token['type'] === HTML5::ENDTAG && |
|
| 3105 | + if ($token['type'] === HTML5::ENDTAG && |
|
| 3106 | 3106 | ($token['name'] === 'td' || $token['name'] === 'th')) { |
| 3107 | 3107 | /* If the stack of open elements does not have an element in table |
| 3108 | 3108 | scope with the same tag name as that of the token, then this is a |
| 3109 | 3109 | parse error and the token must be ignored. */ |
| 3110 | - if(!$this->elementInScope($token['name'], true)) { |
|
| 3110 | + if (!$this->elementInScope($token['name'], true)) { |
|
| 3111 | 3111 | // Ignore. |
| 3112 | 3112 | |
| 3113 | 3113 | /* Otherwise: */ |
@@ -3122,11 +3122,11 @@ discard block |
||
| 3122 | 3122 | |
| 3123 | 3123 | /* Pop elements from this stack until an element with the same |
| 3124 | 3124 | tag name as the token has been popped from the stack. */ |
| 3125 | - while(true) { |
|
| 3125 | + while (true) { |
|
| 3126 | 3126 | $node = end($this->stack)->nodeName; |
| 3127 | 3127 | array_pop($this->stack); |
| 3128 | 3128 | |
| 3129 | - if($node === $token['name']) { |
|
| 3129 | + if ($node === $token['name']) { |
|
| 3130 | 3130 | break; |
| 3131 | 3131 | } |
| 3132 | 3132 | } |
@@ -3142,13 +3142,13 @@ discard block |
||
| 3142 | 3142 | |
| 3143 | 3143 | /* A start tag whose tag name is one of: "caption", "col", "colgroup", |
| 3144 | 3144 | "tbody", "td", "tfoot", "th", "thead", "tr" */ |
| 3145 | - } elseif($token['type'] === HTML5::STARTTAG && in_array($token['name'], |
|
| 3145 | + } elseif ($token['type'] === HTML5::STARTTAG && in_array($token['name'], |
|
| 3146 | 3146 | array('caption', 'col', 'colgroup', 'tbody', 'td', 'tfoot', 'th', |
| 3147 | 3147 | 'thead', 'tr'))) { |
| 3148 | 3148 | /* If the stack of open elements does not have a td or th element |
| 3149 | 3149 | in table scope, then this is a parse error; ignore the token. |
| 3150 | 3150 | (innerHTML case) */ |
| 3151 | - if(!$this->elementInScope(array('td', 'th'), true)) { |
|
| 3151 | + if (!$this->elementInScope(array('td', 'th'), true)) { |
|
| 3152 | 3152 | // Ignore. |
| 3153 | 3153 | |
| 3154 | 3154 | /* Otherwise, close the cell (see below) and reprocess the current |
@@ -3160,13 +3160,13 @@ discard block |
||
| 3160 | 3160 | |
| 3161 | 3161 | /* A start tag whose tag name is one of: "caption", "col", "colgroup", |
| 3162 | 3162 | "tbody", "td", "tfoot", "th", "thead", "tr" */ |
| 3163 | - } elseif($token['type'] === HTML5::STARTTAG && in_array($token['name'], |
|
| 3163 | + } elseif ($token['type'] === HTML5::STARTTAG && in_array($token['name'], |
|
| 3164 | 3164 | array('caption', 'col', 'colgroup', 'tbody', 'td', 'tfoot', 'th', |
| 3165 | 3165 | 'thead', 'tr'))) { |
| 3166 | 3166 | /* If the stack of open elements does not have a td or th element |
| 3167 | 3167 | in table scope, then this is a parse error; ignore the token. |
| 3168 | 3168 | (innerHTML case) */ |
| 3169 | - if(!$this->elementInScope(array('td', 'th'), true)) { |
|
| 3169 | + if (!$this->elementInScope(array('td', 'th'), true)) { |
|
| 3170 | 3170 | // Ignore. |
| 3171 | 3171 | |
| 3172 | 3172 | /* Otherwise, close the cell (see below) and reprocess the current |
@@ -3178,19 +3178,19 @@ discard block |
||
| 3178 | 3178 | |
| 3179 | 3179 | /* An end tag whose tag name is one of: "body", "caption", "col", |
| 3180 | 3180 | "colgroup", "html" */ |
| 3181 | - } elseif($token['type'] === HTML5::ENDTAG && in_array($token['name'], |
|
| 3181 | + } elseif ($token['type'] === HTML5::ENDTAG && in_array($token['name'], |
|
| 3182 | 3182 | array('body', 'caption', 'col', 'colgroup', 'html'))) { |
| 3183 | 3183 | /* Parse error. Ignore the token. */ |
| 3184 | 3184 | |
| 3185 | 3185 | /* An end tag whose tag name is one of: "table", "tbody", "tfoot", |
| 3186 | 3186 | "thead", "tr" */ |
| 3187 | - } elseif($token['type'] === HTML5::ENDTAG && in_array($token['name'], |
|
| 3187 | + } elseif ($token['type'] === HTML5::ENDTAG && in_array($token['name'], |
|
| 3188 | 3188 | array('table', 'tbody', 'tfoot', 'thead', 'tr'))) { |
| 3189 | 3189 | /* If the stack of open elements does not have an element in table |
| 3190 | 3190 | scope with the same tag name as that of the token (which can only |
| 3191 | 3191 | happen for "tbody", "tfoot" and "thead", or, in the innerHTML case), |
| 3192 | 3192 | then this is a parse error and the token must be ignored. */ |
| 3193 | - if(!$this->elementInScope($token['name'], true)) { |
|
| 3193 | + if (!$this->elementInScope($token['name'], true)) { |
|
| 3194 | 3194 | // Ignore. |
| 3195 | 3195 | |
| 3196 | 3196 | /* Otherwise, close the cell (see below) and reprocess the current |
@@ -3211,22 +3211,22 @@ discard block |
||
| 3211 | 3211 | /* Handle the token as follows: */ |
| 3212 | 3212 | |
| 3213 | 3213 | /* A character token */ |
| 3214 | - if($token['type'] === HTML5::CHARACTR) { |
|
| 3214 | + if ($token['type'] === HTML5::CHARACTR) { |
|
| 3215 | 3215 | /* Append the token's character to the current node. */ |
| 3216 | 3216 | $this->insertText($token['data']); |
| 3217 | 3217 | |
| 3218 | 3218 | /* A comment token */ |
| 3219 | - } elseif($token['type'] === HTML5::COMMENT) { |
|
| 3219 | + } elseif ($token['type'] === HTML5::COMMENT) { |
|
| 3220 | 3220 | /* Append a Comment node to the current node with the data |
| 3221 | 3221 | attribute set to the data given in the comment token. */ |
| 3222 | 3222 | $this->insertComment($token['data']); |
| 3223 | 3223 | |
| 3224 | 3224 | /* A start tag token whose tag name is "option" */ |
| 3225 | - } elseif($token['type'] === HTML5::STARTTAG && |
|
| 3225 | + } elseif ($token['type'] === HTML5::STARTTAG && |
|
| 3226 | 3226 | $token['name'] === 'option') { |
| 3227 | 3227 | /* If the current node is an option element, act as if an end tag |
| 3228 | 3228 | with the tag name "option" had been seen. */ |
| 3229 | - if(end($this->stack)->nodeName === 'option') { |
|
| 3229 | + if (end($this->stack)->nodeName === 'option') { |
|
| 3230 | 3230 | $this->inSelect(array( |
| 3231 | 3231 | 'name' => 'option', |
| 3232 | 3232 | 'type' => HTML5::ENDTAG |
@@ -3237,11 +3237,11 @@ discard block |
||
| 3237 | 3237 | $this->insertElement($token); |
| 3238 | 3238 | |
| 3239 | 3239 | /* A start tag token whose tag name is "optgroup" */ |
| 3240 | - } elseif($token['type'] === HTML5::STARTTAG && |
|
| 3240 | + } elseif ($token['type'] === HTML5::STARTTAG && |
|
| 3241 | 3241 | $token['name'] === 'optgroup') { |
| 3242 | 3242 | /* If the current node is an option element, act as if an end tag |
| 3243 | 3243 | with the tag name "option" had been seen. */ |
| 3244 | - if(end($this->stack)->nodeName === 'option') { |
|
| 3244 | + if (end($this->stack)->nodeName === 'option') { |
|
| 3245 | 3245 | $this->inSelect(array( |
| 3246 | 3246 | 'name' => 'option', |
| 3247 | 3247 | 'type' => HTML5::ENDTAG |
@@ -3250,7 +3250,7 @@ discard block |
||
| 3250 | 3250 | |
| 3251 | 3251 | /* If the current node is an optgroup element, act as if an end tag |
| 3252 | 3252 | with the tag name "optgroup" had been seen. */ |
| 3253 | - if(end($this->stack)->nodeName === 'optgroup') { |
|
| 3253 | + if (end($this->stack)->nodeName === 'optgroup') { |
|
| 3254 | 3254 | $this->inSelect(array( |
| 3255 | 3255 | 'name' => 'optgroup', |
| 3256 | 3256 | 'type' => HTML5::ENDTAG |
@@ -3261,7 +3261,7 @@ discard block |
||
| 3261 | 3261 | $this->insertElement($token); |
| 3262 | 3262 | |
| 3263 | 3263 | /* An end tag token whose tag name is "optgroup" */ |
| 3264 | - } elseif($token['type'] === HTML5::ENDTAG && |
|
| 3264 | + } elseif ($token['type'] === HTML5::ENDTAG && |
|
| 3265 | 3265 | $token['name'] === 'optgroup') { |
| 3266 | 3266 | /* First, if the current node is an option element, and the node |
| 3267 | 3267 | immediately before it in the stack of open elements is an optgroup |
@@ -3269,7 +3269,7 @@ discard block |
||
| 3269 | 3269 | been seen. */ |
| 3270 | 3270 | $elements_in_stack = count($this->stack); |
| 3271 | 3271 | |
| 3272 | - if($this->stack[$elements_in_stack - 1]->nodeName === 'option' && |
|
| 3272 | + if ($this->stack[$elements_in_stack - 1]->nodeName === 'option' && |
|
| 3273 | 3273 | $this->stack[$elements_in_stack - 2]->nodeName === 'optgroup') { |
| 3274 | 3274 | $this->inSelect(array( |
| 3275 | 3275 | 'name' => 'option', |
@@ -3280,38 +3280,38 @@ discard block |
||
| 3280 | 3280 | /* If the current node is an optgroup element, then pop that node |
| 3281 | 3281 | from the stack of open elements. Otherwise, this is a parse error, |
| 3282 | 3282 | ignore the token. */ |
| 3283 | - if($this->stack[$elements_in_stack - 1] === 'optgroup') { |
|
| 3283 | + if ($this->stack[$elements_in_stack - 1] === 'optgroup') { |
|
| 3284 | 3284 | array_pop($this->stack); |
| 3285 | 3285 | } |
| 3286 | 3286 | |
| 3287 | 3287 | /* An end tag token whose tag name is "option" */ |
| 3288 | - } elseif($token['type'] === HTML5::ENDTAG && |
|
| 3288 | + } elseif ($token['type'] === HTML5::ENDTAG && |
|
| 3289 | 3289 | $token['name'] === 'option') { |
| 3290 | 3290 | /* If the current node is an option element, then pop that node |
| 3291 | 3291 | from the stack of open elements. Otherwise, this is a parse error, |
| 3292 | 3292 | ignore the token. */ |
| 3293 | - if(end($this->stack)->nodeName === 'option') { |
|
| 3293 | + if (end($this->stack)->nodeName === 'option') { |
|
| 3294 | 3294 | array_pop($this->stack); |
| 3295 | 3295 | } |
| 3296 | 3296 | |
| 3297 | 3297 | /* An end tag whose tag name is "select" */ |
| 3298 | - } elseif($token['type'] === HTML5::ENDTAG && |
|
| 3298 | + } elseif ($token['type'] === HTML5::ENDTAG && |
|
| 3299 | 3299 | $token['name'] === 'select') { |
| 3300 | 3300 | /* If the stack of open elements does not have an element in table |
| 3301 | 3301 | scope with the same tag name as the token, this is a parse error. |
| 3302 | 3302 | Ignore the token. (innerHTML case) */ |
| 3303 | - if(!$this->elementInScope($token['name'], true)) { |
|
| 3303 | + if (!$this->elementInScope($token['name'], true)) { |
|
| 3304 | 3304 | // w/e |
| 3305 | 3305 | |
| 3306 | 3306 | /* Otherwise: */ |
| 3307 | 3307 | } else { |
| 3308 | 3308 | /* Pop elements from the stack of open elements until a select |
| 3309 | 3309 | element has been popped from the stack. */ |
| 3310 | - while(true) { |
|
| 3310 | + while (true) { |
|
| 3311 | 3311 | $current = end($this->stack)->nodeName; |
| 3312 | 3312 | array_pop($this->stack); |
| 3313 | 3313 | |
| 3314 | - if($current === 'select') { |
|
| 3314 | + if ($current === 'select') { |
|
| 3315 | 3315 | break; |
| 3316 | 3316 | } |
| 3317 | 3317 | } |
@@ -3321,7 +3321,7 @@ discard block |
||
| 3321 | 3321 | } |
| 3322 | 3322 | |
| 3323 | 3323 | /* A start tag whose tag name is "select" */ |
| 3324 | - } elseif($token['name'] === 'select' && |
|
| 3324 | + } elseif ($token['name'] === 'select' && |
|
| 3325 | 3325 | $token['type'] === HTML5::STARTTAG) { |
| 3326 | 3326 | /* Parse error. Act as if the token had been an end tag with the |
| 3327 | 3327 | tag name "select" instead. */ |
@@ -3332,7 +3332,7 @@ discard block |
||
| 3332 | 3332 | |
| 3333 | 3333 | /* An end tag whose tag name is one of: "caption", "table", "tbody", |
| 3334 | 3334 | "tfoot", "thead", "tr", "td", "th" */ |
| 3335 | - } elseif(in_array($token['name'], array('caption', 'table', 'tbody', |
|
| 3335 | + } elseif (in_array($token['name'], array('caption', 'table', 'tbody', |
|
| 3336 | 3336 | 'tfoot', 'thead', 'tr', 'td', 'th')) && $token['type'] === HTML5::ENDTAG) { |
| 3337 | 3337 | /* Parse error. */ |
| 3338 | 3338 | // w/e |
@@ -3341,7 +3341,7 @@ discard block |
||
| 3341 | 3341 | the same tag name as that of the token, then act as if an end tag |
| 3342 | 3342 | with the tag name "select" had been seen, and reprocess the token. |
| 3343 | 3343 | Otherwise, ignore the token. */ |
| 3344 | - if($this->elementInScope($token['name'], true)) { |
|
| 3344 | + if ($this->elementInScope($token['name'], true)) { |
|
| 3345 | 3345 | $this->inSelect(array( |
| 3346 | 3346 | 'name' => 'select', |
| 3347 | 3347 | 'type' => HTML5::ENDTAG |
@@ -3362,14 +3362,14 @@ discard block |
||
| 3362 | 3362 | /* A character token that is one of one of U+0009 CHARACTER TABULATION, |
| 3363 | 3363 | U+000A LINE FEED (LF), U+000B LINE TABULATION, U+000C FORM FEED (FF), |
| 3364 | 3364 | or U+0020 SPACE */ |
| 3365 | - if($token['type'] === HTML5::CHARACTR && |
|
| 3365 | + if ($token['type'] === HTML5::CHARACTR && |
|
| 3366 | 3366 | preg_match('/^[\t\n\x0b\x0c ]+$/', $token['data'])) { |
| 3367 | 3367 | /* Process the token as it would be processed if the insertion mode |
| 3368 | 3368 | was "in body". */ |
| 3369 | 3369 | $this->inBody($token); |
| 3370 | 3370 | |
| 3371 | 3371 | /* A comment token */ |
| 3372 | - } elseif($token['type'] === HTML5::COMMENT) { |
|
| 3372 | + } elseif ($token['type'] === HTML5::COMMENT) { |
|
| 3373 | 3373 | /* Append a Comment node to the first element in the stack of open |
| 3374 | 3374 | elements (the html element), with the data attribute set to the |
| 3375 | 3375 | data given in the comment token. */ |
@@ -3377,7 +3377,7 @@ discard block |
||
| 3377 | 3377 | $this->stack[0]->appendChild($comment); |
| 3378 | 3378 | |
| 3379 | 3379 | /* An end tag with the tag name "html" */ |
| 3380 | - } elseif($token['type'] === HTML5::ENDTAG && $token['name'] === 'html') { |
|
| 3380 | + } elseif ($token['type'] === HTML5::ENDTAG && $token['name'] === 'html') { |
|
| 3381 | 3381 | /* If the parser was originally created in order to handle the |
| 3382 | 3382 | setting of an element's innerHTML attribute, this is a parse error; |
| 3383 | 3383 | ignore the token. (The element will be an html element in this |
@@ -3401,28 +3401,28 @@ discard block |
||
| 3401 | 3401 | /* A character token that is one of one of U+0009 CHARACTER TABULATION, |
| 3402 | 3402 | U+000A LINE FEED (LF), U+000B LINE TABULATION, U+000C FORM FEED (FF), |
| 3403 | 3403 | U+000D CARRIAGE RETURN (CR), or U+0020 SPACE */ |
| 3404 | - if($token['type'] === HTML5::CHARACTR && |
|
| 3404 | + if ($token['type'] === HTML5::CHARACTR && |
|
| 3405 | 3405 | preg_match('/^[\t\n\x0b\x0c ]+$/', $token['data'])) { |
| 3406 | 3406 | /* Append the character to the current node. */ |
| 3407 | 3407 | $this->insertText($token['data']); |
| 3408 | 3408 | |
| 3409 | 3409 | /* A comment token */ |
| 3410 | - } elseif($token['type'] === HTML5::COMMENT) { |
|
| 3410 | + } elseif ($token['type'] === HTML5::COMMENT) { |
|
| 3411 | 3411 | /* Append a Comment node to the current node with the data |
| 3412 | 3412 | attribute set to the data given in the comment token. */ |
| 3413 | 3413 | $this->insertComment($token['data']); |
| 3414 | 3414 | |
| 3415 | 3415 | /* A start tag with the tag name "frameset" */ |
| 3416 | - } elseif($token['name'] === 'frameset' && |
|
| 3416 | + } elseif ($token['name'] === 'frameset' && |
|
| 3417 | 3417 | $token['type'] === HTML5::STARTTAG) { |
| 3418 | 3418 | $this->insertElement($token); |
| 3419 | 3419 | |
| 3420 | 3420 | /* An end tag with the tag name "frameset" */ |
| 3421 | - } elseif($token['name'] === 'frameset' && |
|
| 3421 | + } elseif ($token['name'] === 'frameset' && |
|
| 3422 | 3422 | $token['type'] === HTML5::ENDTAG) { |
| 3423 | 3423 | /* If the current node is the root html element, then this is a |
| 3424 | 3424 | parse error; ignore the token. (innerHTML case) */ |
| 3425 | - if(end($this->stack)->nodeName === 'html') { |
|
| 3425 | + if (end($this->stack)->nodeName === 'html') { |
|
| 3426 | 3426 | // Ignore |
| 3427 | 3427 | |
| 3428 | 3428 | } else { |
@@ -3438,7 +3438,7 @@ discard block |
||
| 3438 | 3438 | } |
| 3439 | 3439 | |
| 3440 | 3440 | /* A start tag with the tag name "frame" */ |
| 3441 | - } elseif($token['name'] === 'frame' && |
|
| 3441 | + } elseif ($token['name'] === 'frame' && |
|
| 3442 | 3442 | $token['type'] === HTML5::STARTTAG) { |
| 3443 | 3443 | /* Insert an HTML element for the token. */ |
| 3444 | 3444 | $this->insertElement($token); |
@@ -3447,7 +3447,7 @@ discard block |
||
| 3447 | 3447 | array_pop($this->stack); |
| 3448 | 3448 | |
| 3449 | 3449 | /* A start tag with the tag name "noframes" */ |
| 3450 | - } elseif($token['name'] === 'noframes' && |
|
| 3450 | + } elseif ($token['name'] === 'noframes' && |
|
| 3451 | 3451 | $token['type'] === HTML5::STARTTAG) { |
| 3452 | 3452 | /* Process the token as if the insertion mode had been "in body". */ |
| 3453 | 3453 | $this->inBody($token); |
@@ -3464,25 +3464,25 @@ discard block |
||
| 3464 | 3464 | /* A character token that is one of one of U+0009 CHARACTER TABULATION, |
| 3465 | 3465 | U+000A LINE FEED (LF), U+000B LINE TABULATION, U+000C FORM FEED (FF), |
| 3466 | 3466 | U+000D CARRIAGE RETURN (CR), or U+0020 SPACE */ |
| 3467 | - if($token['type'] === HTML5::CHARACTR && |
|
| 3467 | + if ($token['type'] === HTML5::CHARACTR && |
|
| 3468 | 3468 | preg_match('/^[\t\n\x0b\x0c ]+$/', $token['data'])) { |
| 3469 | 3469 | /* Append the character to the current node. */ |
| 3470 | 3470 | $this->insertText($token['data']); |
| 3471 | 3471 | |
| 3472 | 3472 | /* A comment token */ |
| 3473 | - } elseif($token['type'] === HTML5::COMMENT) { |
|
| 3473 | + } elseif ($token['type'] === HTML5::COMMENT) { |
|
| 3474 | 3474 | /* Append a Comment node to the current node with the data |
| 3475 | 3475 | attribute set to the data given in the comment token. */ |
| 3476 | 3476 | $this->insertComment($token['data']); |
| 3477 | 3477 | |
| 3478 | 3478 | /* An end tag with the tag name "html" */ |
| 3479 | - } elseif($token['name'] === 'html' && |
|
| 3479 | + } elseif ($token['name'] === 'html' && |
|
| 3480 | 3480 | $token['type'] === HTML5::ENDTAG) { |
| 3481 | 3481 | /* Switch to the trailing end phase. */ |
| 3482 | 3482 | $this->phase = self::END_PHASE; |
| 3483 | 3483 | |
| 3484 | 3484 | /* A start tag with the tag name "noframes" */ |
| 3485 | - } elseif($token['name'] === 'noframes' && |
|
| 3485 | + } elseif ($token['name'] === 'noframes' && |
|
| 3486 | 3486 | $token['type'] === HTML5::STARTTAG) { |
| 3487 | 3487 | /* Process the token as if the insertion mode had been "in body". */ |
| 3488 | 3488 | $this->inBody($token); |
@@ -3498,11 +3498,11 @@ discard block |
||
| 3498 | 3498 | stage, it must be processed as described in this section. */ |
| 3499 | 3499 | |
| 3500 | 3500 | /* A DOCTYPE token */ |
| 3501 | - if($token['type'] === HTML5::DOCTYPE) { |
|
| 3501 | + if ($token['type'] === HTML5::DOCTYPE) { |
|
| 3502 | 3502 | // Parse error. Ignore the token. |
| 3503 | 3503 | |
| 3504 | 3504 | /* A comment token */ |
| 3505 | - } elseif($token['type'] === HTML5::COMMENT) { |
|
| 3505 | + } elseif ($token['type'] === HTML5::COMMENT) { |
|
| 3506 | 3506 | /* Append a Comment node to the Document object with the data |
| 3507 | 3507 | attribute set to the data given in the comment token. */ |
| 3508 | 3508 | $comment = $this->dom->createComment($token['data']); |
@@ -3511,7 +3511,7 @@ discard block |
||
| 3511 | 3511 | /* A character token that is one of one of U+0009 CHARACTER TABULATION, |
| 3512 | 3512 | U+000A LINE FEED (LF), U+000B LINE TABULATION, U+000C FORM FEED (FF), |
| 3513 | 3513 | or U+0020 SPACE */ |
| 3514 | - } elseif($token['type'] === HTML5::CHARACTR && |
|
| 3514 | + } elseif ($token['type'] === HTML5::CHARACTR && |
|
| 3515 | 3515 | preg_match('/^[\t\n\x0b\x0c ]+$/', $token['data'])) { |
| 3516 | 3516 | /* Process the token as it would be processed in the main phase. */ |
| 3517 | 3517 | $this->mainPhase($token); |
@@ -3519,7 +3519,7 @@ discard block |
||
| 3519 | 3519 | /* A character token that is not one of U+0009 CHARACTER TABULATION, |
| 3520 | 3520 | U+000A LINE FEED (LF), U+000B LINE TABULATION, U+000C FORM FEED (FF), |
| 3521 | 3521 | or U+0020 SPACE. Or a start tag token. Or an end tag token. */ |
| 3522 | - } elseif(($token['type'] === HTML5::CHARACTR && |
|
| 3522 | + } elseif (($token['type'] === HTML5::CHARACTR && |
|
| 3523 | 3523 | preg_match('/^[\t\n\x0b\x0c ]+$/', $token['data'])) || |
| 3524 | 3524 | $token['type'] === HTML5::STARTTAG || $token['type'] === HTML5::ENDTAG) { |
| 3525 | 3525 | /* Parse error. Switch back to the main phase and reprocess the |
@@ -3528,7 +3528,7 @@ discard block |
||
| 3528 | 3528 | return $this->mainPhase($token); |
| 3529 | 3529 | |
| 3530 | 3530 | /* An end-of-file token */ |
| 3531 | - } elseif($token['type'] === HTML5::EOF) { |
|
| 3531 | + } elseif ($token['type'] === HTML5::EOF) { |
|
| 3532 | 3532 | /* OMG DONE!! */ |
| 3533 | 3533 | } |
| 3534 | 3534 | } |
@@ -3547,8 +3547,8 @@ discard block |
||
| 3547 | 3547 | |
| 3548 | 3548 | $el = $this->dom->createElement($token['name']); |
| 3549 | 3549 | |
| 3550 | - foreach($token['attr'] as $attr) { |
|
| 3551 | - if(!$el->hasAttribute($attr['name'])) { |
|
| 3550 | + foreach ($token['attr'] as $attr) { |
|
| 3551 | + if (!$el->hasAttribute($attr['name'])) { |
|
| 3552 | 3552 | $el->setAttribute($attr['name'], $attr['value']); |
| 3553 | 3553 | } |
| 3554 | 3554 | } |
@@ -3570,25 +3570,25 @@ discard block |
||
| 3570 | 3570 | } |
| 3571 | 3571 | |
| 3572 | 3572 | private function appendToRealParent($node) { |
| 3573 | - if($this->foster_parent === null) { |
|
| 3573 | + if ($this->foster_parent === null) { |
|
| 3574 | 3574 | end($this->stack)->appendChild($node); |
| 3575 | 3575 | |
| 3576 | - } elseif($this->foster_parent !== null) { |
|
| 3576 | + } elseif ($this->foster_parent !== null) { |
|
| 3577 | 3577 | /* If the foster parent element is the parent element of the |
| 3578 | 3578 | last table element in the stack of open elements, then the new |
| 3579 | 3579 | node must be inserted immediately before the last table element |
| 3580 | 3580 | in the stack of open elements in the foster parent element; |
| 3581 | 3581 | otherwise, the new node must be appended to the foster parent |
| 3582 | 3582 | element. */ |
| 3583 | - for($n = count($this->stack) - 1; $n >= 0; $n--) { |
|
| 3584 | - if($this->stack[$n]->nodeName === 'table' && |
|
| 3583 | + for ($n = count($this->stack) - 1; $n >= 0; $n--) { |
|
| 3584 | + if ($this->stack[$n]->nodeName === 'table' && |
|
| 3585 | 3585 | $this->stack[$n]->parentNode !== null) { |
| 3586 | 3586 | $table = $this->stack[$n]; |
| 3587 | 3587 | break; |
| 3588 | 3588 | } |
| 3589 | 3589 | } |
| 3590 | 3590 | |
| 3591 | - if(isset($table) && $this->foster_parent->isSameNode($table->parentNode)) |
|
| 3591 | + if (isset($table) && $this->foster_parent->isSameNode($table->parentNode)) |
|
| 3592 | 3592 | $this->foster_parent->insertBefore($node, $table); |
| 3593 | 3593 | else |
| 3594 | 3594 | $this->foster_parent->appendChild($node); |
@@ -3598,9 +3598,9 @@ discard block |
||
| 3598 | 3598 | } |
| 3599 | 3599 | |
| 3600 | 3600 | private function elementInScope($el, $table = false) { |
| 3601 | - if(is_array($el)) { |
|
| 3602 | - foreach($el as $element) { |
|
| 3603 | - if($this->elementInScope($element, $table)) { |
|
| 3601 | + if (is_array($el)) { |
|
| 3602 | + foreach ($el as $element) { |
|
| 3603 | + if ($this->elementInScope($element, $table)) { |
|
| 3604 | 3604 | return true; |
| 3605 | 3605 | } |
| 3606 | 3606 | } |
@@ -3610,28 +3610,28 @@ discard block |
||
| 3610 | 3610 | |
| 3611 | 3611 | $leng = count($this->stack); |
| 3612 | 3612 | |
| 3613 | - for($n = 0; $n < $leng; $n++) { |
|
| 3613 | + for ($n = 0; $n < $leng; $n++) { |
|
| 3614 | 3614 | /* 1. Initialise node to be the current node (the bottommost node of |
| 3615 | 3615 | the stack). */ |
| 3616 | 3616 | $node = $this->stack[$leng - 1 - $n]; |
| 3617 | 3617 | |
| 3618 | - if($node->tagName === $el) { |
|
| 3618 | + if ($node->tagName === $el) { |
|
| 3619 | 3619 | /* 2. If node is the target node, terminate in a match state. */ |
| 3620 | 3620 | return true; |
| 3621 | 3621 | |
| 3622 | - } elseif($node->tagName === 'table') { |
|
| 3622 | + } elseif ($node->tagName === 'table') { |
|
| 3623 | 3623 | /* 3. Otherwise, if node is a table element, terminate in a failure |
| 3624 | 3624 | state. */ |
| 3625 | 3625 | return false; |
| 3626 | 3626 | |
| 3627 | - } elseif($table === true && in_array($node->tagName, array('caption', 'td', |
|
| 3627 | + } elseif ($table === true && in_array($node->tagName, array('caption', 'td', |
|
| 3628 | 3628 | 'th', 'button', 'marquee', 'object'))) { |
| 3629 | 3629 | /* 4. Otherwise, if the algorithm is the "has an element in scope" |
| 3630 | 3630 | variant (rather than the "has an element in table scope" variant), |
| 3631 | 3631 | and node is one of the following, terminate in a failure state. */ |
| 3632 | 3632 | return false; |
| 3633 | 3633 | |
| 3634 | - } elseif($node === $node->ownerDocument->documentElement) { |
|
| 3634 | + } elseif ($node === $node->ownerDocument->documentElement) { |
|
| 3635 | 3635 | /* 5. Otherwise, if node is an html element (root element), terminate |
| 3636 | 3636 | in a failure state. (This can only happen if the node is the topmost |
| 3637 | 3637 | node of the stack of open elements, and prevents the next step from |
@@ -3651,7 +3651,7 @@ discard block |
||
| 3651 | 3651 | then there is nothing to reconstruct; stop this algorithm. */ |
| 3652 | 3652 | $formatting_elements = count($this->a_formatting); |
| 3653 | 3653 | |
| 3654 | - if($formatting_elements === 0) { |
|
| 3654 | + if ($formatting_elements === 0) { |
|
| 3655 | 3655 | return false; |
| 3656 | 3656 | } |
| 3657 | 3657 | |
@@ -3663,14 +3663,14 @@ discard block |
||
| 3663 | 3663 | formatting elements is a marker, or if it is an element that is in the |
| 3664 | 3664 | stack of open elements, then there is nothing to reconstruct; stop this |
| 3665 | 3665 | algorithm. */ |
| 3666 | - if($entry === self::MARKER || in_array($entry, $this->stack, true)) { |
|
| 3666 | + if ($entry === self::MARKER || in_array($entry, $this->stack, true)) { |
|
| 3667 | 3667 | return false; |
| 3668 | 3668 | } |
| 3669 | 3669 | |
| 3670 | - for($a = $formatting_elements - 1; $a >= 0; true) { |
|
| 3670 | + for ($a = $formatting_elements - 1; $a >= 0; true) { |
|
| 3671 | 3671 | /* 4. If there are no entries before entry in the list of active |
| 3672 | 3672 | formatting elements, then jump to step 8. */ |
| 3673 | - if($a === 0) { |
|
| 3673 | + if ($a === 0) { |
|
| 3674 | 3674 | $step_seven = false; |
| 3675 | 3675 | break; |
| 3676 | 3676 | } |
@@ -3682,15 +3682,15 @@ discard block |
||
| 3682 | 3682 | |
| 3683 | 3683 | /* 6. If entry is neither a marker nor an element that is also in |
| 3684 | 3684 | thetack of open elements, go to step 4. */ |
| 3685 | - if($entry === self::MARKER || in_array($entry, $this->stack, true)) { |
|
| 3685 | + if ($entry === self::MARKER || in_array($entry, $this->stack, true)) { |
|
| 3686 | 3686 | break; |
| 3687 | 3687 | } |
| 3688 | 3688 | } |
| 3689 | 3689 | |
| 3690 | - while(true) { |
|
| 3690 | + while (true) { |
|
| 3691 | 3691 | /* 7. Let entry be the element one later than entry in the list of |
| 3692 | 3692 | active formatting elements. */ |
| 3693 | - if(isset($step_seven) && $step_seven === true) { |
|
| 3693 | + if (isset($step_seven) && $step_seven === true) { |
|
| 3694 | 3694 | $a++; |
| 3695 | 3695 | $entry = $this->a_formatting[$a]; |
| 3696 | 3696 | } |
@@ -3709,7 +3709,7 @@ discard block |
||
| 3709 | 3709 | |
| 3710 | 3710 | /* 11. If the entry for clone in the list of active formatting |
| 3711 | 3711 | elements is not the last entry in the list, return to step 7. */ |
| 3712 | - if(end($this->a_formatting) !== $clone) { |
|
| 3712 | + if (end($this->a_formatting) !== $clone) { |
|
| 3713 | 3713 | $step_seven = true; |
| 3714 | 3714 | } else { |
| 3715 | 3715 | break; |
@@ -3722,7 +3722,7 @@ discard block |
||
| 3722 | 3722 | formatting elements up to the last marker, the UA must perform the |
| 3723 | 3723 | following steps: */ |
| 3724 | 3724 | |
| 3725 | - while(true) { |
|
| 3725 | + while (true) { |
|
| 3726 | 3726 | /* 1. Let entry be the last (most recently added) entry in the list |
| 3727 | 3727 | of active formatting elements. */ |
| 3728 | 3728 | $entry = end($this->a_formatting); |
@@ -3732,7 +3732,7 @@ discard block |
||
| 3732 | 3732 | |
| 3733 | 3733 | /* 3. If entry was a marker, then stop the algorithm at this point. |
| 3734 | 3734 | The list has been cleared up to the last marker. */ |
| 3735 | - if($entry === self::MARKER) { |
|
| 3735 | + if ($entry === self::MARKER) { |
|
| 3736 | 3736 | break; |
| 3737 | 3737 | } |
| 3738 | 3738 | } |
@@ -3747,20 +3747,20 @@ discard block |
||
| 3747 | 3747 | $node = end($this->stack); |
| 3748 | 3748 | $elements = array_diff(array('dd', 'dt', 'li', 'p', 'td', 'th', 'tr'), $exclude); |
| 3749 | 3749 | |
| 3750 | - while(in_array(end($this->stack)->nodeName, $elements)) { |
|
| 3750 | + while (in_array(end($this->stack)->nodeName, $elements)) { |
|
| 3751 | 3751 | array_pop($this->stack); |
| 3752 | 3752 | } |
| 3753 | 3753 | } |
| 3754 | 3754 | |
| 3755 | 3755 | private function getElementCategory($node) { |
| 3756 | 3756 | $name = $node->tagName; |
| 3757 | - if(in_array($name, $this->special)) |
|
| 3757 | + if (in_array($name, $this->special)) |
|
| 3758 | 3758 | return self::SPECIAL; |
| 3759 | 3759 | |
| 3760 | - elseif(in_array($name, $this->scoping)) |
|
| 3760 | + elseif (in_array($name, $this->scoping)) |
|
| 3761 | 3761 | return self::SCOPING; |
| 3762 | 3762 | |
| 3763 | - elseif(in_array($name, $this->formatting)) |
|
| 3763 | + elseif (in_array($name, $this->formatting)) |
|
| 3764 | 3764 | return self::FORMATTING; |
| 3765 | 3765 | |
| 3766 | 3766 | else |
@@ -3773,10 +3773,10 @@ discard block |
||
| 3773 | 3773 | a table element or an html element, pop elements from the stack of open |
| 3774 | 3774 | elements. If this causes any elements to be popped from the stack, then |
| 3775 | 3775 | this is a parse error. */ |
| 3776 | - while(true) { |
|
| 3776 | + while (true) { |
|
| 3777 | 3777 | $node = end($this->stack)->nodeName; |
| 3778 | 3778 | |
| 3779 | - if(in_array($node, $elements)) { |
|
| 3779 | + if (in_array($node, $elements)) { |
|
| 3780 | 3780 | break; |
| 3781 | 3781 | } else { |
| 3782 | 3782 | array_pop($this->stack); |
@@ -3789,7 +3789,7 @@ discard block |
||
| 3789 | 3789 | $last = false; |
| 3790 | 3790 | $leng = count($this->stack); |
| 3791 | 3791 | |
| 3792 | - for($n = $leng - 1; $n >= 0; $n--) { |
|
| 3792 | + for ($n = $leng - 1; $n >= 0; $n--) { |
|
| 3793 | 3793 | /* 2. Let node be the last node in the stack of open elements. */ |
| 3794 | 3794 | $node = $this->stack[$n]; |
| 3795 | 3795 | |
@@ -3797,68 +3797,68 @@ discard block |
||
| 3797 | 3797 | set last to true. If the element whose innerHTML attribute is being |
| 3798 | 3798 | set is neither a td element nor a th element, then set node to the |
| 3799 | 3799 | element whose innerHTML attribute is being set. (innerHTML case) */ |
| 3800 | - if($this->stack[0]->isSameNode($node)) { |
|
| 3800 | + if ($this->stack[0]->isSameNode($node)) { |
|
| 3801 | 3801 | $last = true; |
| 3802 | 3802 | } |
| 3803 | 3803 | |
| 3804 | 3804 | /* 4. If node is a select element, then switch the insertion mode to |
| 3805 | 3805 | "in select" and abort these steps. (innerHTML case) */ |
| 3806 | - if($node->nodeName === 'select') { |
|
| 3806 | + if ($node->nodeName === 'select') { |
|
| 3807 | 3807 | $this->mode = self::IN_SELECT; |
| 3808 | 3808 | break; |
| 3809 | 3809 | |
| 3810 | 3810 | /* 5. If node is a td or th element, then switch the insertion mode |
| 3811 | 3811 | to "in cell" and abort these steps. */ |
| 3812 | - } elseif($node->nodeName === 'td' || $node->nodeName === 'th') { |
|
| 3812 | + } elseif ($node->nodeName === 'td' || $node->nodeName === 'th') { |
|
| 3813 | 3813 | $this->mode = self::IN_CELL; |
| 3814 | 3814 | break; |
| 3815 | 3815 | |
| 3816 | 3816 | /* 6. If node is a tr element, then switch the insertion mode to |
| 3817 | 3817 | "in row" and abort these steps. */ |
| 3818 | - } elseif($node->nodeName === 'tr') { |
|
| 3818 | + } elseif ($node->nodeName === 'tr') { |
|
| 3819 | 3819 | $this->mode = self::IN_ROW; |
| 3820 | 3820 | break; |
| 3821 | 3821 | |
| 3822 | 3822 | /* 7. If node is a tbody, thead, or tfoot element, then switch the |
| 3823 | 3823 | insertion mode to "in table body" and abort these steps. */ |
| 3824 | - } elseif(in_array($node->nodeName, array('tbody', 'thead', 'tfoot'))) { |
|
| 3824 | + } elseif (in_array($node->nodeName, array('tbody', 'thead', 'tfoot'))) { |
|
| 3825 | 3825 | $this->mode = self::IN_TBODY; |
| 3826 | 3826 | break; |
| 3827 | 3827 | |
| 3828 | 3828 | /* 8. If node is a caption element, then switch the insertion mode |
| 3829 | 3829 | to "in caption" and abort these steps. */ |
| 3830 | - } elseif($node->nodeName === 'caption') { |
|
| 3830 | + } elseif ($node->nodeName === 'caption') { |
|
| 3831 | 3831 | $this->mode = self::IN_CAPTION; |
| 3832 | 3832 | break; |
| 3833 | 3833 | |
| 3834 | 3834 | /* 9. If node is a colgroup element, then switch the insertion mode |
| 3835 | 3835 | to "in column group" and abort these steps. (innerHTML case) */ |
| 3836 | - } elseif($node->nodeName === 'colgroup') { |
|
| 3836 | + } elseif ($node->nodeName === 'colgroup') { |
|
| 3837 | 3837 | $this->mode = self::IN_CGROUP; |
| 3838 | 3838 | break; |
| 3839 | 3839 | |
| 3840 | 3840 | /* 10. If node is a table element, then switch the insertion mode |
| 3841 | 3841 | to "in table" and abort these steps. */ |
| 3842 | - } elseif($node->nodeName === 'table') { |
|
| 3842 | + } elseif ($node->nodeName === 'table') { |
|
| 3843 | 3843 | $this->mode = self::IN_TABLE; |
| 3844 | 3844 | break; |
| 3845 | 3845 | |
| 3846 | 3846 | /* 11. If node is a head element, then switch the insertion mode |
| 3847 | 3847 | to "in body" ("in body"! not "in head"!) and abort these steps. |
| 3848 | 3848 | (innerHTML case) */ |
| 3849 | - } elseif($node->nodeName === 'head') { |
|
| 3849 | + } elseif ($node->nodeName === 'head') { |
|
| 3850 | 3850 | $this->mode = self::IN_BODY; |
| 3851 | 3851 | break; |
| 3852 | 3852 | |
| 3853 | 3853 | /* 12. If node is a body element, then switch the insertion mode to |
| 3854 | 3854 | "in body" and abort these steps. */ |
| 3855 | - } elseif($node->nodeName === 'body') { |
|
| 3855 | + } elseif ($node->nodeName === 'body') { |
|
| 3856 | 3856 | $this->mode = self::IN_BODY; |
| 3857 | 3857 | break; |
| 3858 | 3858 | |
| 3859 | 3859 | /* 13. If node is a frameset element, then switch the insertion |
| 3860 | 3860 | mode to "in frameset" and abort these steps. (innerHTML case) */ |
| 3861 | - } elseif($node->nodeName === 'frameset') { |
|
| 3861 | + } elseif ($node->nodeName === 'frameset') { |
|
| 3862 | 3862 | $this->mode = self::IN_FRAME; |
| 3863 | 3863 | break; |
| 3864 | 3864 | |
@@ -3866,7 +3866,7 @@ discard block |
||
| 3866 | 3866 | pointer is null, switch the insertion mode to "before head", |
| 3867 | 3867 | otherwise, switch the insertion mode to "after head". In either |
| 3868 | 3868 | case, abort these steps. (innerHTML case) */ |
| 3869 | - } elseif($node->nodeName === 'html') { |
|
| 3869 | + } elseif ($node->nodeName === 'html') { |
|
| 3870 | 3870 | $this->mode = ($this->head_pointer === null) |
| 3871 | 3871 | ? self::BEFOR_HEAD |
| 3872 | 3872 | : self::AFTER_HEAD; |
@@ -3875,7 +3875,7 @@ discard block |
||
| 3875 | 3875 | |
| 3876 | 3876 | /* 15. If last is true, then set the insertion mode to "in body" |
| 3877 | 3877 | and abort these steps. (innerHTML case) */ |
| 3878 | - } elseif($last) { |
|
| 3878 | + } elseif ($last) { |
|
| 3879 | 3879 | $this->mode = self::IN_BODY; |
| 3880 | 3880 | break; |
| 3881 | 3881 | } |
@@ -3885,8 +3885,8 @@ discard block |
||
| 3885 | 3885 | private function closeCell() { |
| 3886 | 3886 | /* If the stack of open elements has a td or th element in table scope, |
| 3887 | 3887 | then act as if an end tag token with that tag name had been seen. */ |
| 3888 | - foreach(array('td', 'th') as $cell) { |
|
| 3889 | - if($this->elementInScope($cell, true)) { |
|
| 3888 | + foreach (array('td', 'th') as $cell) { |
|
| 3889 | + if ($this->elementInScope($cell, true)) { |
|
| 3890 | 3890 | $this->inCell(array( |
| 3891 | 3891 | 'name' => $cell, |
| 3892 | 3892 | 'type' => HTML5::ENDTAG |
@@ -60,7 +60,7 @@ discard block |
||
| 60 | 60 | |
| 61 | 61 | /** |
| 62 | 62 | * Prints a complete element with content inside |
| 63 | - * @param $tag Tag name |
|
| 63 | + * @param string $tag Tag name |
|
| 64 | 64 | * @param $contents Element contents |
| 65 | 65 | * @param $attr Tag attributes |
| 66 | 66 | * @param $escape Bool whether or not to escape contents |
@@ -71,6 +71,9 @@ discard block |
||
| 71 | 71 | $this->end($tag); |
| 72 | 72 | } |
| 73 | 73 | |
| 74 | + /** |
|
| 75 | + * @param string $tag |
|
| 76 | + */ |
|
| 74 | 77 | protected function elementEmpty($tag, $attr = array()) { |
| 75 | 78 | return $this->generator->generateFromToken( |
| 76 | 79 | new HTMLPurifier_Token_Empty($tag, $attr) |
@@ -6,170 +6,170 @@ |
||
| 6 | 6 | class HTMLPurifier_Printer |
| 7 | 7 | { |
| 8 | 8 | |
| 9 | - /** |
|
| 10 | - * Instance of HTMLPurifier_Generator for HTML generation convenience funcs |
|
| 11 | - */ |
|
| 12 | - protected $generator; |
|
| 13 | - |
|
| 14 | - /** |
|
| 15 | - * Instance of HTMLPurifier_Config, for easy access |
|
| 16 | - */ |
|
| 17 | - protected $config; |
|
| 18 | - |
|
| 19 | - /** |
|
| 20 | - * Initialize $generator. |
|
| 21 | - */ |
|
| 22 | - public function __construct() { |
|
| 23 | - } |
|
| 24 | - |
|
| 25 | - /** |
|
| 26 | - * Give generator necessary configuration if possible |
|
| 27 | - */ |
|
| 28 | - public function prepareGenerator($config) { |
|
| 29 | - $all = $config->getAll(); |
|
| 30 | - $context = new HTMLPurifier_Context(); |
|
| 31 | - $this->generator = new HTMLPurifier_Generator($config, $context); |
|
| 32 | - } |
|
| 33 | - |
|
| 34 | - /** |
|
| 35 | - * Main function that renders object or aspect of that object |
|
| 36 | - * @note Parameters vary depending on printer |
|
| 37 | - */ |
|
| 38 | - // function render() {} |
|
| 39 | - |
|
| 40 | - /** |
|
| 41 | - * Returns a start tag |
|
| 42 | - * @param $tag Tag name |
|
| 43 | - * @param $attr Attribute array |
|
| 44 | - */ |
|
| 45 | - protected function start($tag, $attr = array()) { |
|
| 46 | - return $this->generator->generateFromToken( |
|
| 47 | - new HTMLPurifier_Token_Start($tag, $attr ? $attr : array()) |
|
| 48 | - ); |
|
| 49 | - } |
|
| 50 | - |
|
| 51 | - /** |
|
| 52 | - * Returns an end teg |
|
| 53 | - * @param $tag Tag name |
|
| 54 | - */ |
|
| 55 | - protected function end($tag) { |
|
| 56 | - return $this->generator->generateFromToken( |
|
| 57 | - new HTMLPurifier_Token_End($tag) |
|
| 58 | - ); |
|
| 59 | - } |
|
| 60 | - |
|
| 61 | - /** |
|
| 62 | - * Prints a complete element with content inside |
|
| 63 | - * @param $tag Tag name |
|
| 64 | - * @param $contents Element contents |
|
| 65 | - * @param $attr Tag attributes |
|
| 66 | - * @param $escape Bool whether or not to escape contents |
|
| 67 | - */ |
|
| 68 | - protected function element($tag, $contents, $attr = array(), $escape = true) { |
|
| 69 | - return $this->start($tag, $attr) . |
|
| 70 | - ($escape ? $this->escape($contents) : $contents) . |
|
| 71 | - $this->end($tag); |
|
| 72 | - } |
|
| 73 | - |
|
| 74 | - protected function elementEmpty($tag, $attr = array()) { |
|
| 75 | - return $this->generator->generateFromToken( |
|
| 76 | - new HTMLPurifier_Token_Empty($tag, $attr) |
|
| 77 | - ); |
|
| 78 | - } |
|
| 79 | - |
|
| 80 | - protected function text($text) { |
|
| 81 | - return $this->generator->generateFromToken( |
|
| 82 | - new HTMLPurifier_Token_Text($text) |
|
| 83 | - ); |
|
| 84 | - } |
|
| 85 | - |
|
| 86 | - /** |
|
| 87 | - * Prints a simple key/value row in a table. |
|
| 88 | - * @param $name Key |
|
| 89 | - * @param $value Value |
|
| 90 | - */ |
|
| 91 | - protected function row($name, $value) { |
|
| 92 | - if (is_bool($value)) $value = $value ? 'On' : 'Off'; |
|
| 93 | - return |
|
| 94 | - $this->start('tr') . "\n" . |
|
| 95 | - $this->element('th', $name) . "\n" . |
|
| 96 | - $this->element('td', $value) . "\n" . |
|
| 97 | - $this->end('tr') |
|
| 98 | - ; |
|
| 99 | - } |
|
| 100 | - |
|
| 101 | - /** |
|
| 102 | - * Escapes a string for HTML output. |
|
| 103 | - * @param $string String to escape |
|
| 104 | - */ |
|
| 105 | - protected function escape($string) { |
|
| 106 | - $string = HTMLPurifier_Encoder::cleanUTF8($string); |
|
| 107 | - $string = htmlspecialchars($string, ENT_COMPAT, 'UTF-8', false); |
|
| 108 | - return $string; |
|
| 109 | - } |
|
| 110 | - |
|
| 111 | - /** |
|
| 112 | - * Takes a list of strings and turns them into a single list |
|
| 113 | - * @param $array List of strings |
|
| 114 | - * @param $polite Bool whether or not to add an end before the last |
|
| 115 | - */ |
|
| 116 | - protected function listify($array, $polite = false) { |
|
| 117 | - if (empty($array)) return 'None'; |
|
| 118 | - $ret = ''; |
|
| 119 | - $i = count($array); |
|
| 120 | - foreach ($array as $value) { |
|
| 121 | - $i--; |
|
| 122 | - $ret .= $value; |
|
| 123 | - if ($i > 0 && !($polite && $i == 1)) $ret .= ', '; |
|
| 124 | - if ($polite && $i == 1) $ret .= 'and '; |
|
| 125 | - } |
|
| 126 | - return $ret; |
|
| 127 | - } |
|
| 128 | - |
|
| 129 | - /** |
|
| 130 | - * Retrieves the class of an object without prefixes, as well as metadata |
|
| 131 | - * @param $obj Object to determine class of |
|
| 132 | - * @param $prefix Further prefix to remove |
|
| 133 | - */ |
|
| 134 | - protected function getClass($obj, $sec_prefix = '') { |
|
| 135 | - static $five = null; |
|
| 136 | - if ($five === null) $five = version_compare(PHP_VERSION, '5', '>='); |
|
| 137 | - $prefix = 'HTMLPurifier_' . $sec_prefix; |
|
| 138 | - if (!$five) $prefix = strtolower($prefix); |
|
| 139 | - $class = str_replace($prefix, '', get_class($obj)); |
|
| 140 | - $lclass = strtolower($class); |
|
| 141 | - $class .= '('; |
|
| 142 | - switch ($lclass) { |
|
| 143 | - case 'enum': |
|
| 144 | - $values = array(); |
|
| 145 | - foreach ($obj->valid_values as $value => $bool) { |
|
| 146 | - $values[] = $value; |
|
| 147 | - } |
|
| 148 | - $class .= implode(', ', $values); |
|
| 149 | - break; |
|
| 150 | - case 'css_composite': |
|
| 151 | - $values = array(); |
|
| 152 | - foreach ($obj->defs as $def) { |
|
| 153 | - $values[] = $this->getClass($def, $sec_prefix); |
|
| 154 | - } |
|
| 155 | - $class .= implode(', ', $values); |
|
| 156 | - break; |
|
| 157 | - case 'css_multiple': |
|
| 158 | - $class .= $this->getClass($obj->single, $sec_prefix) . ', '; |
|
| 159 | - $class .= $obj->max; |
|
| 160 | - break; |
|
| 161 | - case 'css_denyelementdecorator': |
|
| 162 | - $class .= $this->getClass($obj->def, $sec_prefix) . ', '; |
|
| 163 | - $class .= $obj->element; |
|
| 164 | - break; |
|
| 165 | - case 'css_importantdecorator': |
|
| 166 | - $class .= $this->getClass($obj->def, $sec_prefix); |
|
| 167 | - if ($obj->allow) $class .= ', !important'; |
|
| 168 | - break; |
|
| 169 | - } |
|
| 170 | - $class .= ')'; |
|
| 171 | - return $class; |
|
| 172 | - } |
|
| 9 | + /** |
|
| 10 | + * Instance of HTMLPurifier_Generator for HTML generation convenience funcs |
|
| 11 | + */ |
|
| 12 | + protected $generator; |
|
| 13 | + |
|
| 14 | + /** |
|
| 15 | + * Instance of HTMLPurifier_Config, for easy access |
|
| 16 | + */ |
|
| 17 | + protected $config; |
|
| 18 | + |
|
| 19 | + /** |
|
| 20 | + * Initialize $generator. |
|
| 21 | + */ |
|
| 22 | + public function __construct() { |
|
| 23 | + } |
|
| 24 | + |
|
| 25 | + /** |
|
| 26 | + * Give generator necessary configuration if possible |
|
| 27 | + */ |
|
| 28 | + public function prepareGenerator($config) { |
|
| 29 | + $all = $config->getAll(); |
|
| 30 | + $context = new HTMLPurifier_Context(); |
|
| 31 | + $this->generator = new HTMLPurifier_Generator($config, $context); |
|
| 32 | + } |
|
| 33 | + |
|
| 34 | + /** |
|
| 35 | + * Main function that renders object or aspect of that object |
|
| 36 | + * @note Parameters vary depending on printer |
|
| 37 | + */ |
|
| 38 | + // function render() {} |
|
| 39 | + |
|
| 40 | + /** |
|
| 41 | + * Returns a start tag |
|
| 42 | + * @param $tag Tag name |
|
| 43 | + * @param $attr Attribute array |
|
| 44 | + */ |
|
| 45 | + protected function start($tag, $attr = array()) { |
|
| 46 | + return $this->generator->generateFromToken( |
|
| 47 | + new HTMLPurifier_Token_Start($tag, $attr ? $attr : array()) |
|
| 48 | + ); |
|
| 49 | + } |
|
| 50 | + |
|
| 51 | + /** |
|
| 52 | + * Returns an end teg |
|
| 53 | + * @param $tag Tag name |
|
| 54 | + */ |
|
| 55 | + protected function end($tag) { |
|
| 56 | + return $this->generator->generateFromToken( |
|
| 57 | + new HTMLPurifier_Token_End($tag) |
|
| 58 | + ); |
|
| 59 | + } |
|
| 60 | + |
|
| 61 | + /** |
|
| 62 | + * Prints a complete element with content inside |
|
| 63 | + * @param $tag Tag name |
|
| 64 | + * @param $contents Element contents |
|
| 65 | + * @param $attr Tag attributes |
|
| 66 | + * @param $escape Bool whether or not to escape contents |
|
| 67 | + */ |
|
| 68 | + protected function element($tag, $contents, $attr = array(), $escape = true) { |
|
| 69 | + return $this->start($tag, $attr) . |
|
| 70 | + ($escape ? $this->escape($contents) : $contents) . |
|
| 71 | + $this->end($tag); |
|
| 72 | + } |
|
| 73 | + |
|
| 74 | + protected function elementEmpty($tag, $attr = array()) { |
|
| 75 | + return $this->generator->generateFromToken( |
|
| 76 | + new HTMLPurifier_Token_Empty($tag, $attr) |
|
| 77 | + ); |
|
| 78 | + } |
|
| 79 | + |
|
| 80 | + protected function text($text) { |
|
| 81 | + return $this->generator->generateFromToken( |
|
| 82 | + new HTMLPurifier_Token_Text($text) |
|
| 83 | + ); |
|
| 84 | + } |
|
| 85 | + |
|
| 86 | + /** |
|
| 87 | + * Prints a simple key/value row in a table. |
|
| 88 | + * @param $name Key |
|
| 89 | + * @param $value Value |
|
| 90 | + */ |
|
| 91 | + protected function row($name, $value) { |
|
| 92 | + if (is_bool($value)) $value = $value ? 'On' : 'Off'; |
|
| 93 | + return |
|
| 94 | + $this->start('tr') . "\n" . |
|
| 95 | + $this->element('th', $name) . "\n" . |
|
| 96 | + $this->element('td', $value) . "\n" . |
|
| 97 | + $this->end('tr') |
|
| 98 | + ; |
|
| 99 | + } |
|
| 100 | + |
|
| 101 | + /** |
|
| 102 | + * Escapes a string for HTML output. |
|
| 103 | + * @param $string String to escape |
|
| 104 | + */ |
|
| 105 | + protected function escape($string) { |
|
| 106 | + $string = HTMLPurifier_Encoder::cleanUTF8($string); |
|
| 107 | + $string = htmlspecialchars($string, ENT_COMPAT, 'UTF-8', false); |
|
| 108 | + return $string; |
|
| 109 | + } |
|
| 110 | + |
|
| 111 | + /** |
|
| 112 | + * Takes a list of strings and turns them into a single list |
|
| 113 | + * @param $array List of strings |
|
| 114 | + * @param $polite Bool whether or not to add an end before the last |
|
| 115 | + */ |
|
| 116 | + protected function listify($array, $polite = false) { |
|
| 117 | + if (empty($array)) return 'None'; |
|
| 118 | + $ret = ''; |
|
| 119 | + $i = count($array); |
|
| 120 | + foreach ($array as $value) { |
|
| 121 | + $i--; |
|
| 122 | + $ret .= $value; |
|
| 123 | + if ($i > 0 && !($polite && $i == 1)) $ret .= ', '; |
|
| 124 | + if ($polite && $i == 1) $ret .= 'and '; |
|
| 125 | + } |
|
| 126 | + return $ret; |
|
| 127 | + } |
|
| 128 | + |
|
| 129 | + /** |
|
| 130 | + * Retrieves the class of an object without prefixes, as well as metadata |
|
| 131 | + * @param $obj Object to determine class of |
|
| 132 | + * @param $prefix Further prefix to remove |
|
| 133 | + */ |
|
| 134 | + protected function getClass($obj, $sec_prefix = '') { |
|
| 135 | + static $five = null; |
|
| 136 | + if ($five === null) $five = version_compare(PHP_VERSION, '5', '>='); |
|
| 137 | + $prefix = 'HTMLPurifier_' . $sec_prefix; |
|
| 138 | + if (!$five) $prefix = strtolower($prefix); |
|
| 139 | + $class = str_replace($prefix, '', get_class($obj)); |
|
| 140 | + $lclass = strtolower($class); |
|
| 141 | + $class .= '('; |
|
| 142 | + switch ($lclass) { |
|
| 143 | + case 'enum': |
|
| 144 | + $values = array(); |
|
| 145 | + foreach ($obj->valid_values as $value => $bool) { |
|
| 146 | + $values[] = $value; |
|
| 147 | + } |
|
| 148 | + $class .= implode(', ', $values); |
|
| 149 | + break; |
|
| 150 | + case 'css_composite': |
|
| 151 | + $values = array(); |
|
| 152 | + foreach ($obj->defs as $def) { |
|
| 153 | + $values[] = $this->getClass($def, $sec_prefix); |
|
| 154 | + } |
|
| 155 | + $class .= implode(', ', $values); |
|
| 156 | + break; |
|
| 157 | + case 'css_multiple': |
|
| 158 | + $class .= $this->getClass($obj->single, $sec_prefix) . ', '; |
|
| 159 | + $class .= $obj->max; |
|
| 160 | + break; |
|
| 161 | + case 'css_denyelementdecorator': |
|
| 162 | + $class .= $this->getClass($obj->def, $sec_prefix) . ', '; |
|
| 163 | + $class .= $obj->element; |
|
| 164 | + break; |
|
| 165 | + case 'css_importantdecorator': |
|
| 166 | + $class .= $this->getClass($obj->def, $sec_prefix); |
|
| 167 | + if ($obj->allow) $class .= ', !important'; |
|
| 168 | + break; |
|
| 169 | + } |
|
| 170 | + $class .= ')'; |
|
| 171 | + return $class; |
|
| 172 | + } |
|
| 173 | 173 | |
| 174 | 174 | } |
| 175 | 175 | |
@@ -89,7 +89,9 @@ discard block |
||
| 89 | 89 | * @param $value Value |
| 90 | 90 | */ |
| 91 | 91 | protected function row($name, $value) { |
| 92 | - if (is_bool($value)) $value = $value ? 'On' : 'Off'; |
|
| 92 | + if (is_bool($value)) { |
|
| 93 | + $value = $value ? 'On' : 'Off'; |
|
| 94 | + } |
|
| 93 | 95 | return |
| 94 | 96 | $this->start('tr') . "\n" . |
| 95 | 97 | $this->element('th', $name) . "\n" . |
@@ -114,14 +116,20 @@ discard block |
||
| 114 | 116 | * @param $polite Bool whether or not to add an end before the last |
| 115 | 117 | */ |
| 116 | 118 | protected function listify($array, $polite = false) { |
| 117 | - if (empty($array)) return 'None'; |
|
| 119 | + if (empty($array)) { |
|
| 120 | + return 'None'; |
|
| 121 | + } |
|
| 118 | 122 | $ret = ''; |
| 119 | 123 | $i = count($array); |
| 120 | 124 | foreach ($array as $value) { |
| 121 | 125 | $i--; |
| 122 | 126 | $ret .= $value; |
| 123 | - if ($i > 0 && !($polite && $i == 1)) $ret .= ', '; |
|
| 124 | - if ($polite && $i == 1) $ret .= 'and '; |
|
| 127 | + if ($i > 0 && !($polite && $i == 1)) { |
|
| 128 | + $ret .= ', '; |
|
| 129 | + } |
|
| 130 | + if ($polite && $i == 1) { |
|
| 131 | + $ret .= 'and '; |
|
| 132 | + } |
|
| 125 | 133 | } |
| 126 | 134 | return $ret; |
| 127 | 135 | } |
@@ -133,9 +141,13 @@ discard block |
||
| 133 | 141 | */ |
| 134 | 142 | protected function getClass($obj, $sec_prefix = '') { |
| 135 | 143 | static $five = null; |
| 136 | - if ($five === null) $five = version_compare(PHP_VERSION, '5', '>='); |
|
| 144 | + if ($five === null) { |
|
| 145 | + $five = version_compare(PHP_VERSION, '5', '>='); |
|
| 146 | + } |
|
| 137 | 147 | $prefix = 'HTMLPurifier_' . $sec_prefix; |
| 138 | - if (!$five) $prefix = strtolower($prefix); |
|
| 148 | + if (!$five) { |
|
| 149 | + $prefix = strtolower($prefix); |
|
| 150 | + } |
|
| 139 | 151 | $class = str_replace($prefix, '', get_class($obj)); |
| 140 | 152 | $lclass = strtolower($class); |
| 141 | 153 | $class .= '('; |
@@ -164,7 +176,9 @@ discard block |
||
| 164 | 176 | break; |
| 165 | 177 | case 'css_importantdecorator': |
| 166 | 178 | $class .= $this->getClass($obj->def, $sec_prefix); |
| 167 | - if ($obj->allow) $class .= ', !important'; |
|
| 179 | + if ($obj->allow) { |
|
| 180 | + $class .= ', !important'; |
|
| 181 | + } |
|
| 168 | 182 | break; |
| 169 | 183 | } |
| 170 | 184 | $class .= ')'; |
@@ -66,8 +66,8 @@ discard block |
||
| 66 | 66 | * @param $escape Bool whether or not to escape contents |
| 67 | 67 | */ |
| 68 | 68 | protected function element($tag, $contents, $attr = array(), $escape = true) { |
| 69 | - return $this->start($tag, $attr) . |
|
| 70 | - ($escape ? $this->escape($contents) : $contents) . |
|
| 69 | + return $this->start($tag, $attr). |
|
| 70 | + ($escape ? $this->escape($contents) : $contents). |
|
| 71 | 71 | $this->end($tag); |
| 72 | 72 | } |
| 73 | 73 | |
@@ -91,9 +91,9 @@ discard block |
||
| 91 | 91 | protected function row($name, $value) { |
| 92 | 92 | if (is_bool($value)) $value = $value ? 'On' : 'Off'; |
| 93 | 93 | return |
| 94 | - $this->start('tr') . "\n" . |
|
| 95 | - $this->element('th', $name) . "\n" . |
|
| 96 | - $this->element('td', $value) . "\n" . |
|
| 94 | + $this->start('tr')."\n". |
|
| 95 | + $this->element('th', $name)."\n". |
|
| 96 | + $this->element('td', $value)."\n". |
|
| 97 | 97 | $this->end('tr') |
| 98 | 98 | ; |
| 99 | 99 | } |
@@ -134,7 +134,7 @@ discard block |
||
| 134 | 134 | protected function getClass($obj, $sec_prefix = '') { |
| 135 | 135 | static $five = null; |
| 136 | 136 | if ($five === null) $five = version_compare(PHP_VERSION, '5', '>='); |
| 137 | - $prefix = 'HTMLPurifier_' . $sec_prefix; |
|
| 137 | + $prefix = 'HTMLPurifier_'.$sec_prefix; |
|
| 138 | 138 | if (!$five) $prefix = strtolower($prefix); |
| 139 | 139 | $class = str_replace($prefix, '', get_class($obj)); |
| 140 | 140 | $lclass = strtolower($class); |
@@ -155,11 +155,11 @@ discard block |
||
| 155 | 155 | $class .= implode(', ', $values); |
| 156 | 156 | break; |
| 157 | 157 | case 'css_multiple': |
| 158 | - $class .= $this->getClass($obj->single, $sec_prefix) . ', '; |
|
| 158 | + $class .= $this->getClass($obj->single, $sec_prefix).', '; |
|
| 159 | 159 | $class .= $obj->max; |
| 160 | 160 | break; |
| 161 | 161 | case 'css_denyelementdecorator': |
| 162 | - $class .= $this->getClass($obj->def, $sec_prefix) . ', '; |
|
| 162 | + $class .= $this->getClass($obj->def, $sec_prefix).', '; |
|
| 163 | 163 | $class .= $obj->element; |
| 164 | 164 | break; |
| 165 | 165 | case 'css_importantdecorator': |