| Total Complexity | 61 |
| Total Lines | 461 |
| Duplicated Lines | 0 % |
| Changes | 0 | ||
Complex classes like HTMLPurifier_HTMLModuleManager often do a lot of different things. To break such a class down, we need to identify a cohesive component within that class. A common approach to find such a component is to look for fields/methods that share the same prefixes, or suffixes.
Once you have determined the fields that belong together, you can apply the Extract Class refactoring. If the component makes sense as a sub-class, Extract Subclass is also a candidate, and is often faster.
While breaking up the class, it is a good idea to analyze how other classes use HTMLPurifier_HTMLModuleManager, and based on these observations, apply Extract Interface, too.
| 1 | <?php |
||
| 3 | class HTMLPurifier_HTMLModuleManager |
||
| 4 | { |
||
| 5 | |||
| 6 | /** |
||
| 7 | * @type HTMLPurifier_DoctypeRegistry |
||
| 8 | */ |
||
| 9 | public $doctypes; |
||
| 10 | |||
| 11 | /** |
||
| 12 | * Instance of current doctype. |
||
| 13 | * @type string |
||
| 14 | */ |
||
| 15 | public $doctype; |
||
| 16 | |||
| 17 | /** |
||
| 18 | * @type HTMLPurifier_AttrTypes |
||
| 19 | */ |
||
| 20 | public $attrTypes; |
||
| 21 | |||
| 22 | /** |
||
| 23 | * Active instances of modules for the specified doctype are |
||
| 24 | * indexed, by name, in this array. |
||
| 25 | * @type HTMLPurifier_HTMLModule[] |
||
| 26 | */ |
||
| 27 | public $modules = array(); |
||
| 28 | |||
| 29 | /** |
||
| 30 | * Array of recognized HTMLPurifier_HTMLModule instances, |
||
| 31 | * indexed by module's class name. This array is usually lazy loaded, but a |
||
| 32 | * user can overload a module by pre-emptively registering it. |
||
| 33 | * @type HTMLPurifier_HTMLModule[] |
||
| 34 | */ |
||
| 35 | public $registeredModules = array(); |
||
| 36 | |||
| 37 | /** |
||
| 38 | * List of extra modules that were added by the user |
||
| 39 | * using addModule(). These get unconditionally merged into the current doctype, whatever |
||
| 40 | * it may be. |
||
| 41 | * @type HTMLPurifier_HTMLModule[] |
||
| 42 | */ |
||
| 43 | public $userModules = array(); |
||
| 44 | |||
| 45 | /** |
||
| 46 | * Associative array of element name to list of modules that have |
||
| 47 | * definitions for the element; this array is dynamically filled. |
||
| 48 | * @type array |
||
| 49 | */ |
||
| 50 | public $elementLookup = array(); |
||
| 51 | |||
| 52 | /** |
||
| 53 | * List of prefixes we should use for registering small names. |
||
| 54 | * @type array |
||
| 55 | */ |
||
| 56 | public $prefixes = array('HTMLPurifier_HTMLModule_'); |
||
| 57 | |||
| 58 | /** |
||
| 59 | * @type HTMLPurifier_ContentSets |
||
| 60 | */ |
||
| 61 | public $contentSets; |
||
| 62 | |||
| 63 | /** |
||
| 64 | * @type HTMLPurifier_AttrCollections |
||
| 65 | */ |
||
| 66 | public $attrCollections; |
||
| 67 | |||
| 68 | /** |
||
| 69 | * If set to true, unsafe elements and attributes will be allowed. |
||
| 70 | * @type bool |
||
| 71 | */ |
||
| 72 | public $trusted = false; |
||
| 73 | |||
| 74 | public function __construct() |
||
| 75 | { |
||
| 76 | // editable internal objects |
||
| 77 | $this->attrTypes = new HTMLPurifier_AttrTypes(); |
||
| 78 | $this->doctypes = new HTMLPurifier_DoctypeRegistry(); |
||
| 79 | |||
| 80 | // setup basic modules |
||
| 81 | $common = array( |
||
| 82 | 'CommonAttributes', 'Text', 'Hypertext', 'List', |
||
| 83 | 'Presentation', 'Edit', 'Bdo', 'Tables', 'Image', |
||
| 84 | 'StyleAttribute', |
||
| 85 | // Unsafe: |
||
| 86 | 'Scripting', 'Object', 'Forms', |
||
| 87 | // Sorta legacy, but present in strict: |
||
| 88 | 'Name', |
||
| 89 | ); |
||
| 90 | $transitional = array('Legacy', 'Target', 'Iframe'); |
||
| 91 | $xml = array('XMLCommonAttributes'); |
||
| 92 | $non_xml = array('NonXMLCommonAttributes'); |
||
| 93 | |||
| 94 | // setup basic doctypes |
||
| 95 | $this->doctypes->register( |
||
| 96 | 'HTML 4.01 Transitional', |
||
| 97 | false, |
||
| 98 | array_merge($common, $transitional, $non_xml), |
||
| 99 | array('Tidy_Transitional', 'Tidy_Proprietary'), |
||
| 100 | array(), |
||
| 101 | '-//W3C//DTD HTML 4.01 Transitional//EN', |
||
| 102 | 'http://www.w3.org/TR/html4/loose.dtd' |
||
| 103 | ); |
||
| 104 | |||
| 105 | $this->doctypes->register( |
||
| 106 | 'HTML 4.01 Strict', |
||
| 107 | false, |
||
| 108 | array_merge($common, $non_xml), |
||
| 109 | array('Tidy_Strict', 'Tidy_Proprietary', 'Tidy_Name'), |
||
| 110 | array(), |
||
| 111 | '-//W3C//DTD HTML 4.01//EN', |
||
| 112 | 'http://www.w3.org/TR/html4/strict.dtd' |
||
| 113 | ); |
||
| 114 | |||
| 115 | $this->doctypes->register( |
||
| 116 | 'XHTML 1.0 Transitional', |
||
| 117 | true, |
||
| 118 | array_merge($common, $transitional, $xml, $non_xml), |
||
| 119 | array('Tidy_Transitional', 'Tidy_XHTML', 'Tidy_Proprietary', 'Tidy_Name'), |
||
| 120 | array(), |
||
| 121 | '-//W3C//DTD XHTML 1.0 Transitional//EN', |
||
| 122 | 'http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd' |
||
| 123 | ); |
||
| 124 | |||
| 125 | $this->doctypes->register( |
||
| 126 | 'XHTML 1.0 Strict', |
||
| 127 | true, |
||
| 128 | array_merge($common, $xml, $non_xml), |
||
| 129 | array('Tidy_Strict', 'Tidy_XHTML', 'Tidy_Strict', 'Tidy_Proprietary', 'Tidy_Name'), |
||
| 130 | array(), |
||
| 131 | '-//W3C//DTD XHTML 1.0 Strict//EN', |
||
| 132 | 'http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd' |
||
| 133 | ); |
||
| 134 | |||
| 135 | $this->doctypes->register( |
||
| 136 | 'XHTML 1.1', |
||
| 137 | true, |
||
| 138 | // Iframe is a real XHTML 1.1 module, despite being |
||
| 139 | // "transitional"! |
||
| 140 | array_merge($common, $xml, array('Ruby', 'Iframe')), |
||
| 141 | array('Tidy_Strict', 'Tidy_XHTML', 'Tidy_Proprietary', 'Tidy_Strict', 'Tidy_Name'), // Tidy_XHTML1_1 |
||
| 142 | array(), |
||
| 143 | '-//W3C//DTD XHTML 1.1//EN', |
||
| 144 | 'http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd' |
||
| 145 | ); |
||
| 146 | |||
| 147 | } |
||
| 148 | |||
| 149 | /** |
||
| 150 | * Registers a module to the recognized module list, useful for |
||
| 151 | * overloading pre-existing modules. |
||
| 152 | * @param $module Mixed: string module name, with or without |
||
| 153 | * HTMLPurifier_HTMLModule prefix, or instance of |
||
| 154 | * subclass of HTMLPurifier_HTMLModule. |
||
| 155 | * @param $overload Boolean whether or not to overload previous modules. |
||
| 156 | * If this is not set, and you do overload a module, |
||
| 157 | * HTML Purifier will complain with a warning. |
||
| 158 | * @note This function will not call autoload, you must instantiate |
||
| 159 | * (and thus invoke) autoload outside the method. |
||
| 160 | * @note If a string is passed as a module name, different variants |
||
| 161 | * will be tested in this order: |
||
| 162 | * - Check for HTMLPurifier_HTMLModule_$name |
||
| 163 | * - Check all prefixes with $name in order they were added |
||
| 164 | * - Check for literal object name |
||
| 165 | * - Throw fatal error |
||
| 166 | * If your object name collides with an internal class, specify |
||
| 167 | * your module manually. All modules must have been included |
||
| 168 | * externally: registerModule will not perform inclusions for you! |
||
| 169 | */ |
||
| 170 | public function registerModule($module, $overload = false) |
||
| 171 | { |
||
| 172 | if (is_string($module)) { |
||
| 173 | // attempt to load the module |
||
| 174 | $original_module = $module; |
||
| 175 | $ok = false; |
||
| 176 | foreach ($this->prefixes as $prefix) { |
||
| 177 | $module = $prefix . $original_module; |
||
| 178 | if (class_exists($module)) { |
||
| 179 | $ok = true; |
||
| 180 | break; |
||
| 181 | } |
||
| 182 | } |
||
| 183 | if (!$ok) { |
||
| 184 | $module = $original_module; |
||
| 185 | if (!class_exists($module)) { |
||
| 186 | trigger_error( |
||
| 187 | $original_module . ' module does not exist', |
||
| 188 | E_USER_ERROR |
||
| 189 | ); |
||
| 190 | return; |
||
| 191 | } |
||
| 192 | } |
||
| 193 | $module = new $module(); |
||
| 194 | } |
||
| 195 | if (empty($module->name)) { |
||
| 196 | trigger_error('Module instance of ' . get_class($module) . ' must have name'); |
||
| 197 | return; |
||
| 198 | } |
||
| 199 | if (!$overload && isset($this->registeredModules[$module->name])) { |
||
| 200 | trigger_error('Overloading ' . $module->name . ' without explicit overload parameter', E_USER_WARNING); |
||
| 201 | } |
||
| 202 | $this->registeredModules[$module->name] = $module; |
||
| 203 | } |
||
| 204 | |||
| 205 | /** |
||
| 206 | * Adds a module to the current doctype by first registering it, |
||
| 207 | * and then tacking it on to the active doctype |
||
| 208 | */ |
||
| 209 | public function addModule($module) |
||
| 210 | { |
||
| 211 | $this->registerModule($module); |
||
| 212 | if (is_object($module)) { |
||
| 213 | $module = $module->name; |
||
| 214 | } |
||
| 215 | $this->userModules[] = $module; |
||
| 216 | } |
||
| 217 | |||
| 218 | /** |
||
| 219 | * Adds a class prefix that registerModule() will use to resolve a |
||
| 220 | * string name to a concrete class |
||
| 221 | */ |
||
| 222 | public function addPrefix($prefix) |
||
| 223 | { |
||
| 224 | $this->prefixes[] = $prefix; |
||
| 225 | } |
||
| 226 | |||
| 227 | /** |
||
| 228 | * Performs processing on modules, after being called you may |
||
| 229 | * use getElement() and getElements() |
||
| 230 | * @param HTMLPurifier_Config $config |
||
| 231 | */ |
||
| 232 | public function setup($config) |
||
| 233 | { |
||
| 234 | $this->trusted = $config->get('HTML.Trusted'); |
||
| 235 | |||
| 236 | // generate |
||
| 237 | $this->doctype = $this->doctypes->make($config); |
||
|
|
|||
| 238 | $modules = $this->doctype->modules; |
||
| 239 | |||
| 240 | // take out the default modules that aren't allowed |
||
| 241 | $lookup = $config->get('HTML.AllowedModules'); |
||
| 242 | $special_cases = $config->get('HTML.CoreModules'); |
||
| 243 | |||
| 244 | if (is_array($lookup)) { |
||
| 245 | foreach ($modules as $k => $m) { |
||
| 246 | if (isset($special_cases[$m])) { |
||
| 247 | continue; |
||
| 248 | } |
||
| 249 | if (!isset($lookup[$m])) { |
||
| 250 | unset($modules[$k]); |
||
| 251 | } |
||
| 252 | } |
||
| 253 | } |
||
| 254 | |||
| 255 | // custom modules |
||
| 256 | if ($config->get('HTML.Proprietary')) { |
||
| 257 | $modules[] = 'Proprietary'; |
||
| 258 | } |
||
| 259 | if ($config->get('HTML.SafeObject')) { |
||
| 260 | $modules[] = 'SafeObject'; |
||
| 261 | } |
||
| 262 | if ($config->get('HTML.SafeEmbed')) { |
||
| 263 | $modules[] = 'SafeEmbed'; |
||
| 264 | } |
||
| 265 | if ($config->get('HTML.SafeScripting') !== array()) { |
||
| 266 | $modules[] = 'SafeScripting'; |
||
| 267 | } |
||
| 268 | if ($config->get('HTML.Nofollow')) { |
||
| 269 | $modules[] = 'Nofollow'; |
||
| 270 | } |
||
| 271 | if ($config->get('HTML.TargetBlank')) { |
||
| 272 | $modules[] = 'TargetBlank'; |
||
| 273 | } |
||
| 274 | // NB: HTML.TargetNoreferrer and HTML.TargetNoopener must be AFTER HTML.TargetBlank |
||
| 275 | // so that its post-attr-transform gets run afterwards. |
||
| 276 | if ($config->get('HTML.TargetNoreferrer')) { |
||
| 277 | $modules[] = 'TargetNoreferrer'; |
||
| 278 | } |
||
| 279 | if ($config->get('HTML.TargetNoopener')) { |
||
| 280 | $modules[] = 'TargetNoopener'; |
||
| 281 | } |
||
| 282 | |||
| 283 | // merge in custom modules |
||
| 284 | $modules = array_merge($modules, $this->userModules); |
||
| 285 | |||
| 286 | foreach ($modules as $module) { |
||
| 287 | $this->processModule($module); |
||
| 288 | $this->modules[$module]->setup($config); |
||
| 289 | } |
||
| 290 | |||
| 291 | foreach ($this->doctype->tidyModules as $module) { |
||
| 292 | $this->processModule($module); |
||
| 293 | $this->modules[$module]->setup($config); |
||
| 294 | } |
||
| 295 | |||
| 296 | // prepare any injectors |
||
| 297 | foreach ($this->modules as $module) { |
||
| 298 | $n = array(); |
||
| 299 | foreach ($module->info_injector as $injector) { |
||
| 300 | if (!is_object($injector)) { |
||
| 301 | $class = "HTMLPurifier_Injector_$injector"; |
||
| 302 | $injector = new $class; |
||
| 303 | } |
||
| 304 | $n[$injector->name] = $injector; |
||
| 305 | } |
||
| 306 | $module->info_injector = $n; |
||
| 307 | } |
||
| 308 | |||
| 309 | // setup lookup table based on all valid modules |
||
| 310 | foreach ($this->modules as $module) { |
||
| 311 | foreach ($module->info as $name => $def) { |
||
| 312 | if (!isset($this->elementLookup[$name])) { |
||
| 313 | $this->elementLookup[$name] = array(); |
||
| 314 | } |
||
| 315 | $this->elementLookup[$name][] = $module->name; |
||
| 316 | } |
||
| 317 | } |
||
| 318 | |||
| 319 | // note the different choice |
||
| 320 | $this->contentSets = new HTMLPurifier_ContentSets( |
||
| 321 | // content set assembly deals with all possible modules, |
||
| 322 | // not just ones deemed to be "safe" |
||
| 323 | $this->modules |
||
| 324 | ); |
||
| 325 | $this->attrCollections = new HTMLPurifier_AttrCollections( |
||
| 326 | $this->attrTypes, |
||
| 327 | // there is no way to directly disable a global attribute, |
||
| 328 | // but using AllowedAttributes or simply not including |
||
| 329 | // the module in your custom doctype should be sufficient |
||
| 330 | $this->modules |
||
| 331 | ); |
||
| 332 | } |
||
| 333 | |||
| 334 | /** |
||
| 335 | * Takes a module and adds it to the active module collection, |
||
| 336 | * registering it if necessary. |
||
| 337 | */ |
||
| 338 | public function processModule($module) |
||
| 339 | { |
||
| 340 | if (!isset($this->registeredModules[$module]) || is_object($module)) { |
||
| 341 | $this->registerModule($module); |
||
| 342 | } |
||
| 343 | $this->modules[$module] = $this->registeredModules[$module]; |
||
| 344 | } |
||
| 345 | |||
| 346 | /** |
||
| 347 | * Retrieves merged element definitions. |
||
| 348 | * @return Array of HTMLPurifier_ElementDef |
||
| 349 | */ |
||
| 350 | public function getElements() |
||
| 351 | { |
||
| 352 | $elements = array(); |
||
| 353 | foreach ($this->modules as $module) { |
||
| 354 | if (!$this->trusted && !$module->safe) { |
||
| 355 | continue; |
||
| 356 | } |
||
| 357 | foreach ($module->info as $name => $v) { |
||
| 358 | if (isset($elements[$name])) { |
||
| 359 | continue; |
||
| 360 | } |
||
| 361 | $elements[$name] = $this->getElement($name); |
||
| 362 | } |
||
| 363 | } |
||
| 364 | |||
| 365 | // remove dud elements, this happens when an element that |
||
| 366 | // appeared to be safe actually wasn't |
||
| 367 | foreach ($elements as $n => $v) { |
||
| 368 | if ($v === false) { |
||
| 369 | unset($elements[$n]); |
||
| 370 | } |
||
| 371 | } |
||
| 372 | |||
| 373 | return $elements; |
||
| 374 | |||
| 375 | } |
||
| 376 | |||
| 377 | /** |
||
| 378 | * Retrieves a single merged element definition |
||
| 379 | * @param string $name Name of element |
||
| 380 | * @param bool $trusted Boolean trusted overriding parameter: set to true |
||
| 381 | * if you want the full version of an element |
||
| 382 | * @return HTMLPurifier_ElementDef Merged HTMLPurifier_ElementDef |
||
| 383 | * @note You may notice that modules are getting iterated over twice (once |
||
| 384 | * in getElements() and once here). This |
||
| 385 | * is because |
||
| 386 | */ |
||
| 387 | public function getElement($name, $trusted = null) |
||
| 464 | } |
||
| 465 | } |
||
| 466 | |||
| 467 | // vim: et sw=4 sts=4 |
||
| 468 |
Our type inference engine has found an assignment to a property that is incompatible with the declared type of that property.
Either this assignment is in error or the assigned type should be added to the documentation/type hint for that property..