@@ -6,85 +6,85 @@ |
||
| 6 | 6 | class HTMLPurifier_DefinitionCacheFactory |
| 7 | 7 | { |
| 8 | 8 | |
| 9 | - protected $caches = array('Serializer' => array()); |
|
| 10 | - protected $implementations = array(); |
|
| 11 | - protected $decorators = array(); |
|
| 9 | + protected $caches = array('Serializer' => array()); |
|
| 10 | + protected $implementations = array(); |
|
| 11 | + protected $decorators = array(); |
|
| 12 | 12 | |
| 13 | - /** |
|
| 14 | - * Initialize default decorators |
|
| 15 | - */ |
|
| 16 | - public function setup() { |
|
| 17 | - $this->addDecorator('Cleanup'); |
|
| 18 | - } |
|
| 13 | + /** |
|
| 14 | + * Initialize default decorators |
|
| 15 | + */ |
|
| 16 | + public function setup() { |
|
| 17 | + $this->addDecorator('Cleanup'); |
|
| 18 | + } |
|
| 19 | 19 | |
| 20 | - /** |
|
| 21 | - * Retrieves an instance of global definition cache factory. |
|
| 22 | - */ |
|
| 23 | - public static function instance($prototype = null) { |
|
| 24 | - static $instance; |
|
| 25 | - if ($prototype !== null) { |
|
| 26 | - $instance = $prototype; |
|
| 27 | - } elseif ($instance === null || $prototype === true) { |
|
| 28 | - $instance = new HTMLPurifier_DefinitionCacheFactory(); |
|
| 29 | - $instance->setup(); |
|
| 30 | - } |
|
| 31 | - return $instance; |
|
| 32 | - } |
|
| 20 | + /** |
|
| 21 | + * Retrieves an instance of global definition cache factory. |
|
| 22 | + */ |
|
| 23 | + public static function instance($prototype = null) { |
|
| 24 | + static $instance; |
|
| 25 | + if ($prototype !== null) { |
|
| 26 | + $instance = $prototype; |
|
| 27 | + } elseif ($instance === null || $prototype === true) { |
|
| 28 | + $instance = new HTMLPurifier_DefinitionCacheFactory(); |
|
| 29 | + $instance->setup(); |
|
| 30 | + } |
|
| 31 | + return $instance; |
|
| 32 | + } |
|
| 33 | 33 | |
| 34 | - /** |
|
| 35 | - * Registers a new definition cache object |
|
| 36 | - * @param $short Short name of cache object, for reference |
|
| 37 | - * @param $long Full class name of cache object, for construction |
|
| 38 | - */ |
|
| 39 | - public function register($short, $long) { |
|
| 40 | - $this->implementations[$short] = $long; |
|
| 41 | - } |
|
| 34 | + /** |
|
| 35 | + * Registers a new definition cache object |
|
| 36 | + * @param $short Short name of cache object, for reference |
|
| 37 | + * @param $long Full class name of cache object, for construction |
|
| 38 | + */ |
|
| 39 | + public function register($short, $long) { |
|
| 40 | + $this->implementations[$short] = $long; |
|
| 41 | + } |
|
| 42 | 42 | |
| 43 | - /** |
|
| 44 | - * Factory method that creates a cache object based on configuration |
|
| 45 | - * @param $name Name of definitions handled by cache |
|
| 46 | - * @param $config Instance of HTMLPurifier_Config |
|
| 47 | - */ |
|
| 48 | - public function create($type, $config) { |
|
| 49 | - $method = $config->get('Cache.DefinitionImpl'); |
|
| 50 | - if ($method === null) { |
|
| 51 | - return new HTMLPurifier_DefinitionCache_Null($type); |
|
| 52 | - } |
|
| 53 | - if (!empty($this->caches[$method][$type])) { |
|
| 54 | - return $this->caches[$method][$type]; |
|
| 55 | - } |
|
| 56 | - if ( |
|
| 57 | - isset($this->implementations[$method]) && |
|
| 58 | - class_exists($class = $this->implementations[$method], false) |
|
| 59 | - ) { |
|
| 60 | - $cache = new $class($type); |
|
| 61 | - } else { |
|
| 62 | - if ($method != 'Serializer') { |
|
| 63 | - trigger_error("Unrecognized DefinitionCache $method, using Serializer instead", E_USER_WARNING); |
|
| 64 | - } |
|
| 65 | - $cache = new HTMLPurifier_DefinitionCache_Serializer($type); |
|
| 66 | - } |
|
| 67 | - foreach ($this->decorators as $decorator) { |
|
| 68 | - $new_cache = $decorator->decorate($cache); |
|
| 69 | - // prevent infinite recursion in PHP 4 |
|
| 70 | - unset($cache); |
|
| 71 | - $cache = $new_cache; |
|
| 72 | - } |
|
| 73 | - $this->caches[$method][$type] = $cache; |
|
| 74 | - return $this->caches[$method][$type]; |
|
| 75 | - } |
|
| 43 | + /** |
|
| 44 | + * Factory method that creates a cache object based on configuration |
|
| 45 | + * @param $name Name of definitions handled by cache |
|
| 46 | + * @param $config Instance of HTMLPurifier_Config |
|
| 47 | + */ |
|
| 48 | + public function create($type, $config) { |
|
| 49 | + $method = $config->get('Cache.DefinitionImpl'); |
|
| 50 | + if ($method === null) { |
|
| 51 | + return new HTMLPurifier_DefinitionCache_Null($type); |
|
| 52 | + } |
|
| 53 | + if (!empty($this->caches[$method][$type])) { |
|
| 54 | + return $this->caches[$method][$type]; |
|
| 55 | + } |
|
| 56 | + if ( |
|
| 57 | + isset($this->implementations[$method]) && |
|
| 58 | + class_exists($class = $this->implementations[$method], false) |
|
| 59 | + ) { |
|
| 60 | + $cache = new $class($type); |
|
| 61 | + } else { |
|
| 62 | + if ($method != 'Serializer') { |
|
| 63 | + trigger_error("Unrecognized DefinitionCache $method, using Serializer instead", E_USER_WARNING); |
|
| 64 | + } |
|
| 65 | + $cache = new HTMLPurifier_DefinitionCache_Serializer($type); |
|
| 66 | + } |
|
| 67 | + foreach ($this->decorators as $decorator) { |
|
| 68 | + $new_cache = $decorator->decorate($cache); |
|
| 69 | + // prevent infinite recursion in PHP 4 |
|
| 70 | + unset($cache); |
|
| 71 | + $cache = $new_cache; |
|
| 72 | + } |
|
| 73 | + $this->caches[$method][$type] = $cache; |
|
| 74 | + return $this->caches[$method][$type]; |
|
| 75 | + } |
|
| 76 | 76 | |
| 77 | - /** |
|
| 78 | - * Registers a decorator to add to all new cache objects |
|
| 79 | - * @param |
|
| 80 | - */ |
|
| 81 | - public function addDecorator($decorator) { |
|
| 82 | - if (is_string($decorator)) { |
|
| 83 | - $class = "HTMLPurifier_DefinitionCache_Decorator_$decorator"; |
|
| 84 | - $decorator = new $class; |
|
| 85 | - } |
|
| 86 | - $this->decorators[$decorator->name] = $decorator; |
|
| 87 | - } |
|
| 77 | + /** |
|
| 78 | + * Registers a decorator to add to all new cache objects |
|
| 79 | + * @param |
|
| 80 | + */ |
|
| 81 | + public function addDecorator($decorator) { |
|
| 82 | + if (is_string($decorator)) { |
|
| 83 | + $class = "HTMLPurifier_DefinitionCache_Decorator_$decorator"; |
|
| 84 | + $decorator = new $class; |
|
| 85 | + } |
|
| 86 | + $this->decorators[$decorator->name] = $decorator; |
|
| 87 | + } |
|
| 88 | 88 | |
| 89 | 89 | } |
| 90 | 90 | |
@@ -8,53 +8,53 @@ |
||
| 8 | 8 | */ |
| 9 | 9 | class HTMLPurifier_Doctype |
| 10 | 10 | { |
| 11 | - /** |
|
| 12 | - * Full name of doctype |
|
| 13 | - */ |
|
| 14 | - public $name; |
|
| 15 | - |
|
| 16 | - /** |
|
| 17 | - * List of standard modules (string identifiers or literal objects) |
|
| 18 | - * that this doctype uses |
|
| 19 | - */ |
|
| 20 | - public $modules = array(); |
|
| 21 | - |
|
| 22 | - /** |
|
| 23 | - * List of modules to use for tidying up code |
|
| 24 | - */ |
|
| 25 | - public $tidyModules = array(); |
|
| 26 | - |
|
| 27 | - /** |
|
| 28 | - * Is the language derived from XML (i.e. XHTML)? |
|
| 29 | - */ |
|
| 30 | - public $xml = true; |
|
| 31 | - |
|
| 32 | - /** |
|
| 33 | - * List of aliases for this doctype |
|
| 34 | - */ |
|
| 35 | - public $aliases = array(); |
|
| 36 | - |
|
| 37 | - /** |
|
| 38 | - * Public DTD identifier |
|
| 39 | - */ |
|
| 40 | - public $dtdPublic; |
|
| 41 | - |
|
| 42 | - /** |
|
| 43 | - * System DTD identifier |
|
| 44 | - */ |
|
| 45 | - public $dtdSystem; |
|
| 46 | - |
|
| 47 | - public function __construct($name = null, $xml = true, $modules = array(), |
|
| 48 | - $tidyModules = array(), $aliases = array(), $dtd_public = null, $dtd_system = null |
|
| 49 | - ) { |
|
| 50 | - $this->name = $name; |
|
| 51 | - $this->xml = $xml; |
|
| 52 | - $this->modules = $modules; |
|
| 53 | - $this->tidyModules = $tidyModules; |
|
| 54 | - $this->aliases = $aliases; |
|
| 55 | - $this->dtdPublic = $dtd_public; |
|
| 56 | - $this->dtdSystem = $dtd_system; |
|
| 57 | - } |
|
| 11 | + /** |
|
| 12 | + * Full name of doctype |
|
| 13 | + */ |
|
| 14 | + public $name; |
|
| 15 | + |
|
| 16 | + /** |
|
| 17 | + * List of standard modules (string identifiers or literal objects) |
|
| 18 | + * that this doctype uses |
|
| 19 | + */ |
|
| 20 | + public $modules = array(); |
|
| 21 | + |
|
| 22 | + /** |
|
| 23 | + * List of modules to use for tidying up code |
|
| 24 | + */ |
|
| 25 | + public $tidyModules = array(); |
|
| 26 | + |
|
| 27 | + /** |
|
| 28 | + * Is the language derived from XML (i.e. XHTML)? |
|
| 29 | + */ |
|
| 30 | + public $xml = true; |
|
| 31 | + |
|
| 32 | + /** |
|
| 33 | + * List of aliases for this doctype |
|
| 34 | + */ |
|
| 35 | + public $aliases = array(); |
|
| 36 | + |
|
| 37 | + /** |
|
| 38 | + * Public DTD identifier |
|
| 39 | + */ |
|
| 40 | + public $dtdPublic; |
|
| 41 | + |
|
| 42 | + /** |
|
| 43 | + * System DTD identifier |
|
| 44 | + */ |
|
| 45 | + public $dtdSystem; |
|
| 46 | + |
|
| 47 | + public function __construct($name = null, $xml = true, $modules = array(), |
|
| 48 | + $tidyModules = array(), $aliases = array(), $dtd_public = null, $dtd_system = null |
|
| 49 | + ) { |
|
| 50 | + $this->name = $name; |
|
| 51 | + $this->xml = $xml; |
|
| 52 | + $this->modules = $modules; |
|
| 53 | + $this->tidyModules = $tidyModules; |
|
| 54 | + $this->aliases = $aliases; |
|
| 55 | + $this->dtdPublic = $dtd_public; |
|
| 56 | + $this->dtdSystem = $dtd_system; |
|
| 57 | + } |
|
| 58 | 58 | } |
| 59 | 59 | |
| 60 | 60 | // vim: et sw=4 sts=4 |
@@ -11,172 +11,172 @@ |
||
| 11 | 11 | class HTMLPurifier_ElementDef |
| 12 | 12 | { |
| 13 | 13 | |
| 14 | - /** |
|
| 15 | - * Does the definition work by itself, or is it created solely |
|
| 16 | - * for the purpose of merging into another definition? |
|
| 17 | - */ |
|
| 18 | - public $standalone = true; |
|
| 19 | - |
|
| 20 | - /** |
|
| 21 | - * Associative array of attribute name to HTMLPurifier_AttrDef |
|
| 22 | - * @note Before being processed by HTMLPurifier_AttrCollections |
|
| 23 | - * when modules are finalized during |
|
| 24 | - * HTMLPurifier_HTMLDefinition->setup(), this array may also |
|
| 25 | - * contain an array at index 0 that indicates which attribute |
|
| 26 | - * collections to load into the full array. It may also |
|
| 27 | - * contain string indentifiers in lieu of HTMLPurifier_AttrDef, |
|
| 28 | - * see HTMLPurifier_AttrTypes on how they are expanded during |
|
| 29 | - * HTMLPurifier_HTMLDefinition->setup() processing. |
|
| 30 | - */ |
|
| 31 | - public $attr = array(); |
|
| 32 | - |
|
| 33 | - /** |
|
| 34 | - * Indexed list of tag's HTMLPurifier_AttrTransform to be done before validation |
|
| 35 | - */ |
|
| 36 | - public $attr_transform_pre = array(); |
|
| 37 | - |
|
| 38 | - /** |
|
| 39 | - * Indexed list of tag's HTMLPurifier_AttrTransform to be done after validation |
|
| 40 | - */ |
|
| 41 | - public $attr_transform_post = array(); |
|
| 42 | - |
|
| 43 | - /** |
|
| 44 | - * HTMLPurifier_ChildDef of this tag. |
|
| 45 | - */ |
|
| 46 | - public $child; |
|
| 47 | - |
|
| 48 | - /** |
|
| 49 | - * Abstract string representation of internal ChildDef rules. See |
|
| 50 | - * HTMLPurifier_ContentSets for how this is parsed and then transformed |
|
| 51 | - * into an HTMLPurifier_ChildDef. |
|
| 52 | - * @warning This is a temporary variable that is not available after |
|
| 53 | - * being processed by HTMLDefinition |
|
| 54 | - */ |
|
| 55 | - public $content_model; |
|
| 56 | - |
|
| 57 | - /** |
|
| 58 | - * Value of $child->type, used to determine which ChildDef to use, |
|
| 59 | - * used in combination with $content_model. |
|
| 60 | - * @warning This must be lowercase |
|
| 61 | - * @warning This is a temporary variable that is not available after |
|
| 62 | - * being processed by HTMLDefinition |
|
| 63 | - */ |
|
| 64 | - public $content_model_type; |
|
| 65 | - |
|
| 66 | - |
|
| 67 | - |
|
| 68 | - /** |
|
| 69 | - * Does the element have a content model (#PCDATA | Inline)*? This |
|
| 70 | - * is important for chameleon ins and del processing in |
|
| 71 | - * HTMLPurifier_ChildDef_Chameleon. Dynamically set: modules don't |
|
| 72 | - * have to worry about this one. |
|
| 73 | - */ |
|
| 74 | - public $descendants_are_inline = false; |
|
| 75 | - |
|
| 76 | - /** |
|
| 77 | - * List of the names of required attributes this element has. Dynamically |
|
| 78 | - * populated by HTMLPurifier_HTMLDefinition::getElement |
|
| 79 | - */ |
|
| 80 | - public $required_attr = array(); |
|
| 81 | - |
|
| 82 | - /** |
|
| 83 | - * Lookup table of tags excluded from all descendants of this tag. |
|
| 84 | - * @note SGML permits exclusions for all descendants, but this is |
|
| 85 | - * not possible with DTDs or XML Schemas. W3C has elected to |
|
| 86 | - * use complicated compositions of content_models to simulate |
|
| 87 | - * exclusion for children, but we go the simpler, SGML-style |
|
| 88 | - * route of flat-out exclusions, which correctly apply to |
|
| 89 | - * all descendants and not just children. Note that the XHTML |
|
| 90 | - * Modularization Abstract Modules are blithely unaware of such |
|
| 91 | - * distinctions. |
|
| 92 | - */ |
|
| 93 | - public $excludes = array(); |
|
| 94 | - |
|
| 95 | - /** |
|
| 96 | - * This tag is explicitly auto-closed by the following tags. |
|
| 97 | - */ |
|
| 98 | - public $autoclose = array(); |
|
| 99 | - |
|
| 100 | - /** |
|
| 101 | - * If a foreign element is found in this element, test if it is |
|
| 102 | - * allowed by this sub-element; if it is, instead of closing the |
|
| 103 | - * current element, place it inside this element. |
|
| 104 | - */ |
|
| 105 | - public $wrap; |
|
| 106 | - |
|
| 107 | - /** |
|
| 108 | - * Whether or not this is a formatting element affected by the |
|
| 109 | - * "Active Formatting Elements" algorithm. |
|
| 110 | - */ |
|
| 111 | - public $formatting; |
|
| 112 | - |
|
| 113 | - /** |
|
| 114 | - * Low-level factory constructor for creating new standalone element defs |
|
| 115 | - */ |
|
| 116 | - public static function create($content_model, $content_model_type, $attr) { |
|
| 117 | - $def = new HTMLPurifier_ElementDef(); |
|
| 118 | - $def->content_model = $content_model; |
|
| 119 | - $def->content_model_type = $content_model_type; |
|
| 120 | - $def->attr = $attr; |
|
| 121 | - return $def; |
|
| 122 | - } |
|
| 123 | - |
|
| 124 | - /** |
|
| 125 | - * Merges the values of another element definition into this one. |
|
| 126 | - * Values from the new element def take precedence if a value is |
|
| 127 | - * not mergeable. |
|
| 128 | - */ |
|
| 129 | - public function mergeIn($def) { |
|
| 130 | - |
|
| 131 | - // later keys takes precedence |
|
| 132 | - foreach($def->attr as $k => $v) { |
|
| 133 | - if ($k === 0) { |
|
| 134 | - // merge in the includes |
|
| 135 | - // sorry, no way to override an include |
|
| 136 | - foreach ($v as $v2) { |
|
| 137 | - $this->attr[0][] = $v2; |
|
| 138 | - } |
|
| 139 | - continue; |
|
| 140 | - } |
|
| 141 | - if ($v === false) { |
|
| 142 | - if (isset($this->attr[$k])) unset($this->attr[$k]); |
|
| 143 | - continue; |
|
| 144 | - } |
|
| 145 | - $this->attr[$k] = $v; |
|
| 146 | - } |
|
| 147 | - $this->_mergeAssocArray($this->attr_transform_pre, $def->attr_transform_pre); |
|
| 148 | - $this->_mergeAssocArray($this->attr_transform_post, $def->attr_transform_post); |
|
| 149 | - $this->_mergeAssocArray($this->excludes, $def->excludes); |
|
| 150 | - |
|
| 151 | - if(!empty($def->content_model)) { |
|
| 152 | - $this->content_model = |
|
| 153 | - str_replace("#SUPER", $this->content_model, $def->content_model); |
|
| 154 | - $this->child = false; |
|
| 155 | - } |
|
| 156 | - if(!empty($def->content_model_type)) { |
|
| 157 | - $this->content_model_type = $def->content_model_type; |
|
| 158 | - $this->child = false; |
|
| 159 | - } |
|
| 160 | - if(!is_null($def->child)) $this->child = $def->child; |
|
| 161 | - if(!is_null($def->formatting)) $this->formatting = $def->formatting; |
|
| 162 | - if($def->descendants_are_inline) $this->descendants_are_inline = $def->descendants_are_inline; |
|
| 163 | - |
|
| 164 | - } |
|
| 165 | - |
|
| 166 | - /** |
|
| 167 | - * Merges one array into another, removes values which equal false |
|
| 168 | - * @param $a1 Array by reference that is merged into |
|
| 169 | - * @param $a2 Array that merges into $a1 |
|
| 170 | - */ |
|
| 171 | - private function _mergeAssocArray(&$a1, $a2) { |
|
| 172 | - foreach ($a2 as $k => $v) { |
|
| 173 | - if ($v === false) { |
|
| 174 | - if (isset($a1[$k])) unset($a1[$k]); |
|
| 175 | - continue; |
|
| 176 | - } |
|
| 177 | - $a1[$k] = $v; |
|
| 178 | - } |
|
| 179 | - } |
|
| 14 | + /** |
|
| 15 | + * Does the definition work by itself, or is it created solely |
|
| 16 | + * for the purpose of merging into another definition? |
|
| 17 | + */ |
|
| 18 | + public $standalone = true; |
|
| 19 | + |
|
| 20 | + /** |
|
| 21 | + * Associative array of attribute name to HTMLPurifier_AttrDef |
|
| 22 | + * @note Before being processed by HTMLPurifier_AttrCollections |
|
| 23 | + * when modules are finalized during |
|
| 24 | + * HTMLPurifier_HTMLDefinition->setup(), this array may also |
|
| 25 | + * contain an array at index 0 that indicates which attribute |
|
| 26 | + * collections to load into the full array. It may also |
|
| 27 | + * contain string indentifiers in lieu of HTMLPurifier_AttrDef, |
|
| 28 | + * see HTMLPurifier_AttrTypes on how they are expanded during |
|
| 29 | + * HTMLPurifier_HTMLDefinition->setup() processing. |
|
| 30 | + */ |
|
| 31 | + public $attr = array(); |
|
| 32 | + |
|
| 33 | + /** |
|
| 34 | + * Indexed list of tag's HTMLPurifier_AttrTransform to be done before validation |
|
| 35 | + */ |
|
| 36 | + public $attr_transform_pre = array(); |
|
| 37 | + |
|
| 38 | + /** |
|
| 39 | + * Indexed list of tag's HTMLPurifier_AttrTransform to be done after validation |
|
| 40 | + */ |
|
| 41 | + public $attr_transform_post = array(); |
|
| 42 | + |
|
| 43 | + /** |
|
| 44 | + * HTMLPurifier_ChildDef of this tag. |
|
| 45 | + */ |
|
| 46 | + public $child; |
|
| 47 | + |
|
| 48 | + /** |
|
| 49 | + * Abstract string representation of internal ChildDef rules. See |
|
| 50 | + * HTMLPurifier_ContentSets for how this is parsed and then transformed |
|
| 51 | + * into an HTMLPurifier_ChildDef. |
|
| 52 | + * @warning This is a temporary variable that is not available after |
|
| 53 | + * being processed by HTMLDefinition |
|
| 54 | + */ |
|
| 55 | + public $content_model; |
|
| 56 | + |
|
| 57 | + /** |
|
| 58 | + * Value of $child->type, used to determine which ChildDef to use, |
|
| 59 | + * used in combination with $content_model. |
|
| 60 | + * @warning This must be lowercase |
|
| 61 | + * @warning This is a temporary variable that is not available after |
|
| 62 | + * being processed by HTMLDefinition |
|
| 63 | + */ |
|
| 64 | + public $content_model_type; |
|
| 65 | + |
|
| 66 | + |
|
| 67 | + |
|
| 68 | + /** |
|
| 69 | + * Does the element have a content model (#PCDATA | Inline)*? This |
|
| 70 | + * is important for chameleon ins and del processing in |
|
| 71 | + * HTMLPurifier_ChildDef_Chameleon. Dynamically set: modules don't |
|
| 72 | + * have to worry about this one. |
|
| 73 | + */ |
|
| 74 | + public $descendants_are_inline = false; |
|
| 75 | + |
|
| 76 | + /** |
|
| 77 | + * List of the names of required attributes this element has. Dynamically |
|
| 78 | + * populated by HTMLPurifier_HTMLDefinition::getElement |
|
| 79 | + */ |
|
| 80 | + public $required_attr = array(); |
|
| 81 | + |
|
| 82 | + /** |
|
| 83 | + * Lookup table of tags excluded from all descendants of this tag. |
|
| 84 | + * @note SGML permits exclusions for all descendants, but this is |
|
| 85 | + * not possible with DTDs or XML Schemas. W3C has elected to |
|
| 86 | + * use complicated compositions of content_models to simulate |
|
| 87 | + * exclusion for children, but we go the simpler, SGML-style |
|
| 88 | + * route of flat-out exclusions, which correctly apply to |
|
| 89 | + * all descendants and not just children. Note that the XHTML |
|
| 90 | + * Modularization Abstract Modules are blithely unaware of such |
|
| 91 | + * distinctions. |
|
| 92 | + */ |
|
| 93 | + public $excludes = array(); |
|
| 94 | + |
|
| 95 | + /** |
|
| 96 | + * This tag is explicitly auto-closed by the following tags. |
|
| 97 | + */ |
|
| 98 | + public $autoclose = array(); |
|
| 99 | + |
|
| 100 | + /** |
|
| 101 | + * If a foreign element is found in this element, test if it is |
|
| 102 | + * allowed by this sub-element; if it is, instead of closing the |
|
| 103 | + * current element, place it inside this element. |
|
| 104 | + */ |
|
| 105 | + public $wrap; |
|
| 106 | + |
|
| 107 | + /** |
|
| 108 | + * Whether or not this is a formatting element affected by the |
|
| 109 | + * "Active Formatting Elements" algorithm. |
|
| 110 | + */ |
|
| 111 | + public $formatting; |
|
| 112 | + |
|
| 113 | + /** |
|
| 114 | + * Low-level factory constructor for creating new standalone element defs |
|
| 115 | + */ |
|
| 116 | + public static function create($content_model, $content_model_type, $attr) { |
|
| 117 | + $def = new HTMLPurifier_ElementDef(); |
|
| 118 | + $def->content_model = $content_model; |
|
| 119 | + $def->content_model_type = $content_model_type; |
|
| 120 | + $def->attr = $attr; |
|
| 121 | + return $def; |
|
| 122 | + } |
|
| 123 | + |
|
| 124 | + /** |
|
| 125 | + * Merges the values of another element definition into this one. |
|
| 126 | + * Values from the new element def take precedence if a value is |
|
| 127 | + * not mergeable. |
|
| 128 | + */ |
|
| 129 | + public function mergeIn($def) { |
|
| 130 | + |
|
| 131 | + // later keys takes precedence |
|
| 132 | + foreach($def->attr as $k => $v) { |
|
| 133 | + if ($k === 0) { |
|
| 134 | + // merge in the includes |
|
| 135 | + // sorry, no way to override an include |
|
| 136 | + foreach ($v as $v2) { |
|
| 137 | + $this->attr[0][] = $v2; |
|
| 138 | + } |
|
| 139 | + continue; |
|
| 140 | + } |
|
| 141 | + if ($v === false) { |
|
| 142 | + if (isset($this->attr[$k])) unset($this->attr[$k]); |
|
| 143 | + continue; |
|
| 144 | + } |
|
| 145 | + $this->attr[$k] = $v; |
|
| 146 | + } |
|
| 147 | + $this->_mergeAssocArray($this->attr_transform_pre, $def->attr_transform_pre); |
|
| 148 | + $this->_mergeAssocArray($this->attr_transform_post, $def->attr_transform_post); |
|
| 149 | + $this->_mergeAssocArray($this->excludes, $def->excludes); |
|
| 150 | + |
|
| 151 | + if(!empty($def->content_model)) { |
|
| 152 | + $this->content_model = |
|
| 153 | + str_replace("#SUPER", $this->content_model, $def->content_model); |
|
| 154 | + $this->child = false; |
|
| 155 | + } |
|
| 156 | + if(!empty($def->content_model_type)) { |
|
| 157 | + $this->content_model_type = $def->content_model_type; |
|
| 158 | + $this->child = false; |
|
| 159 | + } |
|
| 160 | + if(!is_null($def->child)) $this->child = $def->child; |
|
| 161 | + if(!is_null($def->formatting)) $this->formatting = $def->formatting; |
|
| 162 | + if($def->descendants_are_inline) $this->descendants_are_inline = $def->descendants_are_inline; |
|
| 163 | + |
|
| 164 | + } |
|
| 165 | + |
|
| 166 | + /** |
|
| 167 | + * Merges one array into another, removes values which equal false |
|
| 168 | + * @param $a1 Array by reference that is merged into |
|
| 169 | + * @param $a2 Array that merges into $a1 |
|
| 170 | + */ |
|
| 171 | + private function _mergeAssocArray(&$a1, $a2) { |
|
| 172 | + foreach ($a2 as $k => $v) { |
|
| 173 | + if ($v === false) { |
|
| 174 | + if (isset($a1[$k])) unset($a1[$k]); |
|
| 175 | + continue; |
|
| 176 | + } |
|
| 177 | + $a1[$k] = $v; |
|
| 178 | + } |
|
| 179 | + } |
|
| 180 | 180 | |
| 181 | 181 | } |
| 182 | 182 | |
@@ -129,7 +129,7 @@ discard block |
||
| 129 | 129 | public function mergeIn($def) { |
| 130 | 130 | |
| 131 | 131 | // later keys takes precedence |
| 132 | - foreach($def->attr as $k => $v) { |
|
| 132 | + foreach ($def->attr as $k => $v) { |
|
| 133 | 133 | if ($k === 0) { |
| 134 | 134 | // merge in the includes |
| 135 | 135 | // sorry, no way to override an include |
@@ -148,18 +148,18 @@ discard block |
||
| 148 | 148 | $this->_mergeAssocArray($this->attr_transform_post, $def->attr_transform_post); |
| 149 | 149 | $this->_mergeAssocArray($this->excludes, $def->excludes); |
| 150 | 150 | |
| 151 | - if(!empty($def->content_model)) { |
|
| 151 | + if (!empty($def->content_model)) { |
|
| 152 | 152 | $this->content_model = |
| 153 | 153 | str_replace("#SUPER", $this->content_model, $def->content_model); |
| 154 | 154 | $this->child = false; |
| 155 | 155 | } |
| 156 | - if(!empty($def->content_model_type)) { |
|
| 156 | + if (!empty($def->content_model_type)) { |
|
| 157 | 157 | $this->content_model_type = $def->content_model_type; |
| 158 | 158 | $this->child = false; |
| 159 | 159 | } |
| 160 | - if(!is_null($def->child)) $this->child = $def->child; |
|
| 161 | - if(!is_null($def->formatting)) $this->formatting = $def->formatting; |
|
| 162 | - if($def->descendants_are_inline) $this->descendants_are_inline = $def->descendants_are_inline; |
|
| 160 | + if (!is_null($def->child)) $this->child = $def->child; |
|
| 161 | + if (!is_null($def->formatting)) $this->formatting = $def->formatting; |
|
| 162 | + if ($def->descendants_are_inline) $this->descendants_are_inline = $def->descendants_are_inline; |
|
| 163 | 163 | |
| 164 | 164 | } |
| 165 | 165 | |
@@ -139,7 +139,9 @@ discard block |
||
| 139 | 139 | continue; |
| 140 | 140 | } |
| 141 | 141 | if ($v === false) { |
| 142 | - if (isset($this->attr[$k])) unset($this->attr[$k]); |
|
| 142 | + if (isset($this->attr[$k])) { |
|
| 143 | + unset($this->attr[$k]); |
|
| 144 | + } |
|
| 143 | 145 | continue; |
| 144 | 146 | } |
| 145 | 147 | $this->attr[$k] = $v; |
@@ -157,9 +159,15 @@ discard block |
||
| 157 | 159 | $this->content_model_type = $def->content_model_type; |
| 158 | 160 | $this->child = false; |
| 159 | 161 | } |
| 160 | - if(!is_null($def->child)) $this->child = $def->child; |
|
| 161 | - if(!is_null($def->formatting)) $this->formatting = $def->formatting; |
|
| 162 | - if($def->descendants_are_inline) $this->descendants_are_inline = $def->descendants_are_inline; |
|
| 162 | + if(!is_null($def->child)) { |
|
| 163 | + $this->child = $def->child; |
|
| 164 | + } |
|
| 165 | + if(!is_null($def->formatting)) { |
|
| 166 | + $this->formatting = $def->formatting; |
|
| 167 | + } |
|
| 168 | + if($def->descendants_are_inline) { |
|
| 169 | + $this->descendants_are_inline = $def->descendants_are_inline; |
|
| 170 | + } |
|
| 163 | 171 | |
| 164 | 172 | } |
| 165 | 173 | |
@@ -171,7 +179,9 @@ discard block |
||
| 171 | 179 | private function _mergeAssocArray(&$a1, $a2) { |
| 172 | 180 | foreach ($a2 as $k => $v) { |
| 173 | 181 | if ($v === false) { |
| 174 | - if (isset($a1[$k])) unset($a1[$k]); |
|
| 182 | + if (isset($a1[$k])) { |
|
| 183 | + unset($a1[$k]); |
|
| 184 | + } |
|
| 175 | 185 | continue; |
| 176 | 186 | } |
| 177 | 187 | $a1[$k] = $v; |
@@ -5,39 +5,39 @@ |
||
| 5 | 5 | */ |
| 6 | 6 | class HTMLPurifier_EntityLookup { |
| 7 | 7 | |
| 8 | - /** |
|
| 9 | - * Assoc array of entity name to character represented. |
|
| 10 | - */ |
|
| 11 | - public $table; |
|
| 8 | + /** |
|
| 9 | + * Assoc array of entity name to character represented. |
|
| 10 | + */ |
|
| 11 | + public $table; |
|
| 12 | 12 | |
| 13 | - /** |
|
| 14 | - * Sets up the entity lookup table from the serialized file contents. |
|
| 15 | - * @note The serialized contents are versioned, but were generated |
|
| 16 | - * using the maintenance script generate_entity_file.php |
|
| 17 | - * @warning This is not in constructor to help enforce the Singleton |
|
| 18 | - */ |
|
| 19 | - public function setup($file = false) { |
|
| 20 | - if (!$file) { |
|
| 21 | - $file = HTMLPURIFIER_PREFIX . '/HTMLPurifier/EntityLookup/entities.ser'; |
|
| 22 | - } |
|
| 23 | - $this->table = unserialize(file_get_contents($file)); |
|
| 24 | - } |
|
| 13 | + /** |
|
| 14 | + * Sets up the entity lookup table from the serialized file contents. |
|
| 15 | + * @note The serialized contents are versioned, but were generated |
|
| 16 | + * using the maintenance script generate_entity_file.php |
|
| 17 | + * @warning This is not in constructor to help enforce the Singleton |
|
| 18 | + */ |
|
| 19 | + public function setup($file = false) { |
|
| 20 | + if (!$file) { |
|
| 21 | + $file = HTMLPURIFIER_PREFIX . '/HTMLPurifier/EntityLookup/entities.ser'; |
|
| 22 | + } |
|
| 23 | + $this->table = unserialize(file_get_contents($file)); |
|
| 24 | + } |
|
| 25 | 25 | |
| 26 | - /** |
|
| 27 | - * Retrieves sole instance of the object. |
|
| 28 | - * @param Optional prototype of custom lookup table to overload with. |
|
| 29 | - */ |
|
| 30 | - public static function instance($prototype = false) { |
|
| 31 | - // no references, since PHP doesn't copy unless modified |
|
| 32 | - static $instance = null; |
|
| 33 | - if ($prototype) { |
|
| 34 | - $instance = $prototype; |
|
| 35 | - } elseif (!$instance) { |
|
| 36 | - $instance = new HTMLPurifier_EntityLookup(); |
|
| 37 | - $instance->setup(); |
|
| 38 | - } |
|
| 39 | - return $instance; |
|
| 40 | - } |
|
| 26 | + /** |
|
| 27 | + * Retrieves sole instance of the object. |
|
| 28 | + * @param Optional prototype of custom lookup table to overload with. |
|
| 29 | + */ |
|
| 30 | + public static function instance($prototype = false) { |
|
| 31 | + // no references, since PHP doesn't copy unless modified |
|
| 32 | + static $instance = null; |
|
| 33 | + if ($prototype) { |
|
| 34 | + $instance = $prototype; |
|
| 35 | + } elseif (!$instance) { |
|
| 36 | + $instance = new HTMLPurifier_EntityLookup(); |
|
| 37 | + $instance->setup(); |
|
| 38 | + } |
|
| 39 | + return $instance; |
|
| 40 | + } |
|
| 41 | 41 | |
| 42 | 42 | } |
| 43 | 43 | |
@@ -18,7 +18,7 @@ |
||
| 18 | 18 | */ |
| 19 | 19 | public function setup($file = false) { |
| 20 | 20 | if (!$file) { |
| 21 | - $file = HTMLPURIFIER_PREFIX . '/HTMLPurifier/EntityLookup/entities.ser'; |
|
| 21 | + $file = HTMLPURIFIER_PREFIX.'/HTMLPurifier/EntityLookup/entities.ser'; |
|
| 22 | 22 | } |
| 23 | 23 | $this->table = unserialize(file_get_contents($file)); |
| 24 | 24 | } |
@@ -10,134 +10,134 @@ |
||
| 10 | 10 | class HTMLPurifier_EntityParser |
| 11 | 11 | { |
| 12 | 12 | |
| 13 | - /** |
|
| 14 | - * Reference to entity lookup table. |
|
| 15 | - */ |
|
| 16 | - protected $_entity_lookup; |
|
| 17 | - |
|
| 18 | - /** |
|
| 19 | - * Callback regex string for parsing entities. |
|
| 20 | - */ |
|
| 21 | - protected $_substituteEntitiesRegex = |
|
| 13 | + /** |
|
| 14 | + * Reference to entity lookup table. |
|
| 15 | + */ |
|
| 16 | + protected $_entity_lookup; |
|
| 17 | + |
|
| 18 | + /** |
|
| 19 | + * Callback regex string for parsing entities. |
|
| 20 | + */ |
|
| 21 | + protected $_substituteEntitiesRegex = |
|
| 22 | 22 | '/&(?:[#]x([a-fA-F0-9]+)|[#]0*(\d+)|([A-Za-z_:][A-Za-z0-9.\-_:]*));?/'; |
| 23 | 23 | // 1. hex 2. dec 3. string (XML style) |
| 24 | 24 | |
| 25 | 25 | |
| 26 | - /** |
|
| 27 | - * Decimal to parsed string conversion table for special entities. |
|
| 28 | - */ |
|
| 29 | - protected $_special_dec2str = |
|
| 30 | - array( |
|
| 31 | - 34 => '"', |
|
| 32 | - 38 => '&', |
|
| 33 | - 39 => "'", |
|
| 34 | - 60 => '<', |
|
| 35 | - 62 => '>' |
|
| 36 | - ); |
|
| 37 | - |
|
| 38 | - /** |
|
| 39 | - * Stripped entity names to decimal conversion table for special entities. |
|
| 40 | - */ |
|
| 41 | - protected $_special_ent2dec = |
|
| 42 | - array( |
|
| 43 | - 'quot' => 34, |
|
| 44 | - 'amp' => 38, |
|
| 45 | - 'lt' => 60, |
|
| 46 | - 'gt' => 62 |
|
| 47 | - ); |
|
| 48 | - |
|
| 49 | - /** |
|
| 50 | - * Substitutes non-special entities with their parsed equivalents. Since |
|
| 51 | - * running this whenever you have parsed character is t3h 5uck, we run |
|
| 52 | - * it before everything else. |
|
| 53 | - * |
|
| 54 | - * @param $string String to have non-special entities parsed. |
|
| 55 | - * @returns Parsed string. |
|
| 56 | - */ |
|
| 57 | - public function substituteNonSpecialEntities($string) { |
|
| 58 | - // it will try to detect missing semicolons, but don't rely on it |
|
| 59 | - return preg_replace_callback( |
|
| 60 | - $this->_substituteEntitiesRegex, |
|
| 61 | - array($this, 'nonSpecialEntityCallback'), |
|
| 62 | - $string |
|
| 63 | - ); |
|
| 64 | - } |
|
| 65 | - |
|
| 66 | - /** |
|
| 67 | - * Callback function for substituteNonSpecialEntities() that does the work. |
|
| 68 | - * |
|
| 69 | - * @param $matches PCRE matches array, with 0 the entire match, and |
|
| 70 | - * either index 1, 2 or 3 set with a hex value, dec value, |
|
| 71 | - * or string (respectively). |
|
| 72 | - * @returns Replacement string. |
|
| 73 | - */ |
|
| 74 | - |
|
| 75 | - protected function nonSpecialEntityCallback($matches) { |
|
| 76 | - // replaces all but big five |
|
| 77 | - $entity = $matches[0]; |
|
| 78 | - $is_num = (@$matches[0][1] === '#'); |
|
| 79 | - if ($is_num) { |
|
| 80 | - $is_hex = (@$entity[2] === 'x'); |
|
| 81 | - $code = $is_hex ? hexdec($matches[1]) : (int) $matches[2]; |
|
| 82 | - |
|
| 83 | - // abort for special characters |
|
| 84 | - if (isset($this->_special_dec2str[$code])) return $entity; |
|
| 85 | - |
|
| 86 | - return HTMLPurifier_Encoder::unichr($code); |
|
| 87 | - } else { |
|
| 88 | - if (isset($this->_special_ent2dec[$matches[3]])) return $entity; |
|
| 89 | - if (!$this->_entity_lookup) { |
|
| 90 | - $this->_entity_lookup = HTMLPurifier_EntityLookup::instance(); |
|
| 91 | - } |
|
| 92 | - if (isset($this->_entity_lookup->table[$matches[3]])) { |
|
| 93 | - return $this->_entity_lookup->table[$matches[3]]; |
|
| 94 | - } else { |
|
| 95 | - return $entity; |
|
| 96 | - } |
|
| 97 | - } |
|
| 98 | - } |
|
| 99 | - |
|
| 100 | - /** |
|
| 101 | - * Substitutes only special entities with their parsed equivalents. |
|
| 102 | - * |
|
| 103 | - * @notice We try to avoid calling this function because otherwise, it |
|
| 104 | - * would have to be called a lot (for every parsed section). |
|
| 105 | - * |
|
| 106 | - * @param $string String to have non-special entities parsed. |
|
| 107 | - * @returns Parsed string. |
|
| 108 | - */ |
|
| 109 | - public function substituteSpecialEntities($string) { |
|
| 110 | - return preg_replace_callback( |
|
| 111 | - $this->_substituteEntitiesRegex, |
|
| 112 | - array($this, 'specialEntityCallback'), |
|
| 113 | - $string); |
|
| 114 | - } |
|
| 115 | - |
|
| 116 | - /** |
|
| 117 | - * Callback function for substituteSpecialEntities() that does the work. |
|
| 118 | - * |
|
| 119 | - * This callback has same syntax as nonSpecialEntityCallback(). |
|
| 120 | - * |
|
| 121 | - * @param $matches PCRE-style matches array, with 0 the entire match, and |
|
| 122 | - * either index 1, 2 or 3 set with a hex value, dec value, |
|
| 123 | - * or string (respectively). |
|
| 124 | - * @returns Replacement string. |
|
| 125 | - */ |
|
| 126 | - protected function specialEntityCallback($matches) { |
|
| 127 | - $entity = $matches[0]; |
|
| 128 | - $is_num = (@$matches[0][1] === '#'); |
|
| 129 | - if ($is_num) { |
|
| 130 | - $is_hex = (@$entity[2] === 'x'); |
|
| 131 | - $int = $is_hex ? hexdec($matches[1]) : (int) $matches[2]; |
|
| 132 | - return isset($this->_special_dec2str[$int]) ? |
|
| 133 | - $this->_special_dec2str[$int] : |
|
| 134 | - $entity; |
|
| 135 | - } else { |
|
| 136 | - return isset($this->_special_ent2dec[$matches[3]]) ? |
|
| 137 | - $this->_special_ent2dec[$matches[3]] : |
|
| 138 | - $entity; |
|
| 139 | - } |
|
| 140 | - } |
|
| 26 | + /** |
|
| 27 | + * Decimal to parsed string conversion table for special entities. |
|
| 28 | + */ |
|
| 29 | + protected $_special_dec2str = |
|
| 30 | + array( |
|
| 31 | + 34 => '"', |
|
| 32 | + 38 => '&', |
|
| 33 | + 39 => "'", |
|
| 34 | + 60 => '<', |
|
| 35 | + 62 => '>' |
|
| 36 | + ); |
|
| 37 | + |
|
| 38 | + /** |
|
| 39 | + * Stripped entity names to decimal conversion table for special entities. |
|
| 40 | + */ |
|
| 41 | + protected $_special_ent2dec = |
|
| 42 | + array( |
|
| 43 | + 'quot' => 34, |
|
| 44 | + 'amp' => 38, |
|
| 45 | + 'lt' => 60, |
|
| 46 | + 'gt' => 62 |
|
| 47 | + ); |
|
| 48 | + |
|
| 49 | + /** |
|
| 50 | + * Substitutes non-special entities with their parsed equivalents. Since |
|
| 51 | + * running this whenever you have parsed character is t3h 5uck, we run |
|
| 52 | + * it before everything else. |
|
| 53 | + * |
|
| 54 | + * @param $string String to have non-special entities parsed. |
|
| 55 | + * @returns Parsed string. |
|
| 56 | + */ |
|
| 57 | + public function substituteNonSpecialEntities($string) { |
|
| 58 | + // it will try to detect missing semicolons, but don't rely on it |
|
| 59 | + return preg_replace_callback( |
|
| 60 | + $this->_substituteEntitiesRegex, |
|
| 61 | + array($this, 'nonSpecialEntityCallback'), |
|
| 62 | + $string |
|
| 63 | + ); |
|
| 64 | + } |
|
| 65 | + |
|
| 66 | + /** |
|
| 67 | + * Callback function for substituteNonSpecialEntities() that does the work. |
|
| 68 | + * |
|
| 69 | + * @param $matches PCRE matches array, with 0 the entire match, and |
|
| 70 | + * either index 1, 2 or 3 set with a hex value, dec value, |
|
| 71 | + * or string (respectively). |
|
| 72 | + * @returns Replacement string. |
|
| 73 | + */ |
|
| 74 | + |
|
| 75 | + protected function nonSpecialEntityCallback($matches) { |
|
| 76 | + // replaces all but big five |
|
| 77 | + $entity = $matches[0]; |
|
| 78 | + $is_num = (@$matches[0][1] === '#'); |
|
| 79 | + if ($is_num) { |
|
| 80 | + $is_hex = (@$entity[2] === 'x'); |
|
| 81 | + $code = $is_hex ? hexdec($matches[1]) : (int) $matches[2]; |
|
| 82 | + |
|
| 83 | + // abort for special characters |
|
| 84 | + if (isset($this->_special_dec2str[$code])) return $entity; |
|
| 85 | + |
|
| 86 | + return HTMLPurifier_Encoder::unichr($code); |
|
| 87 | + } else { |
|
| 88 | + if (isset($this->_special_ent2dec[$matches[3]])) return $entity; |
|
| 89 | + if (!$this->_entity_lookup) { |
|
| 90 | + $this->_entity_lookup = HTMLPurifier_EntityLookup::instance(); |
|
| 91 | + } |
|
| 92 | + if (isset($this->_entity_lookup->table[$matches[3]])) { |
|
| 93 | + return $this->_entity_lookup->table[$matches[3]]; |
|
| 94 | + } else { |
|
| 95 | + return $entity; |
|
| 96 | + } |
|
| 97 | + } |
|
| 98 | + } |
|
| 99 | + |
|
| 100 | + /** |
|
| 101 | + * Substitutes only special entities with their parsed equivalents. |
|
| 102 | + * |
|
| 103 | + * @notice We try to avoid calling this function because otherwise, it |
|
| 104 | + * would have to be called a lot (for every parsed section). |
|
| 105 | + * |
|
| 106 | + * @param $string String to have non-special entities parsed. |
|
| 107 | + * @returns Parsed string. |
|
| 108 | + */ |
|
| 109 | + public function substituteSpecialEntities($string) { |
|
| 110 | + return preg_replace_callback( |
|
| 111 | + $this->_substituteEntitiesRegex, |
|
| 112 | + array($this, 'specialEntityCallback'), |
|
| 113 | + $string); |
|
| 114 | + } |
|
| 115 | + |
|
| 116 | + /** |
|
| 117 | + * Callback function for substituteSpecialEntities() that does the work. |
|
| 118 | + * |
|
| 119 | + * This callback has same syntax as nonSpecialEntityCallback(). |
|
| 120 | + * |
|
| 121 | + * @param $matches PCRE-style matches array, with 0 the entire match, and |
|
| 122 | + * either index 1, 2 or 3 set with a hex value, dec value, |
|
| 123 | + * or string (respectively). |
|
| 124 | + * @returns Replacement string. |
|
| 125 | + */ |
|
| 126 | + protected function specialEntityCallback($matches) { |
|
| 127 | + $entity = $matches[0]; |
|
| 128 | + $is_num = (@$matches[0][1] === '#'); |
|
| 129 | + if ($is_num) { |
|
| 130 | + $is_hex = (@$entity[2] === 'x'); |
|
| 131 | + $int = $is_hex ? hexdec($matches[1]) : (int) $matches[2]; |
|
| 132 | + return isset($this->_special_dec2str[$int]) ? |
|
| 133 | + $this->_special_dec2str[$int] : |
|
| 134 | + $entity; |
|
| 135 | + } else { |
|
| 136 | + return isset($this->_special_ent2dec[$matches[3]]) ? |
|
| 137 | + $this->_special_ent2dec[$matches[3]] : |
|
| 138 | + $entity; |
|
| 139 | + } |
|
| 140 | + } |
|
| 141 | 141 | |
| 142 | 142 | } |
| 143 | 143 | |
@@ -130,12 +130,10 @@ |
||
| 130 | 130 | $is_hex = (@$entity[2] === 'x'); |
| 131 | 131 | $int = $is_hex ? hexdec($matches[1]) : (int) $matches[2]; |
| 132 | 132 | return isset($this->_special_dec2str[$int]) ? |
| 133 | - $this->_special_dec2str[$int] : |
|
| 134 | - $entity; |
|
| 133 | + $this->_special_dec2str[$int] : $entity; |
|
| 135 | 134 | } else { |
| 136 | 135 | return isset($this->_special_ent2dec[$matches[3]]) ? |
| 137 | - $this->_special_ent2dec[$matches[3]] : |
|
| 138 | - $entity; |
|
| 136 | + $this->_special_ent2dec[$matches[3]] : $entity; |
|
| 139 | 137 | } |
| 140 | 138 | } |
| 141 | 139 | |
@@ -81,11 +81,15 @@ |
||
| 81 | 81 | $code = $is_hex ? hexdec($matches[1]) : (int) $matches[2]; |
| 82 | 82 | |
| 83 | 83 | // abort for special characters |
| 84 | - if (isset($this->_special_dec2str[$code])) return $entity; |
|
| 84 | + if (isset($this->_special_dec2str[$code])) { |
|
| 85 | + return $entity; |
|
| 86 | + } |
|
| 85 | 87 | |
| 86 | 88 | return HTMLPurifier_Encoder::unichr($code); |
| 87 | 89 | } else { |
| 88 | - if (isset($this->_special_ent2dec[$matches[3]])) return $entity; |
|
| 90 | + if (isset($this->_special_ent2dec[$matches[3]])) { |
|
| 91 | + return $entity; |
|
| 92 | + } |
|
| 89 | 93 | if (!$this->_entity_lookup) { |
| 90 | 94 | $this->_entity_lookup = HTMLPurifier_EntityLookup::instance(); |
| 91 | 95 | } |
@@ -9,51 +9,51 @@ |
||
| 9 | 9 | class HTMLPurifier_ErrorStruct |
| 10 | 10 | { |
| 11 | 11 | |
| 12 | - /** |
|
| 13 | - * Possible values for $children first-key. Note that top-level structures |
|
| 14 | - * are automatically token-level. |
|
| 15 | - */ |
|
| 16 | - const TOKEN = 0; |
|
| 17 | - const ATTR = 1; |
|
| 18 | - const CSSPROP = 2; |
|
| 19 | - |
|
| 20 | - /** |
|
| 21 | - * Type of this struct. |
|
| 22 | - */ |
|
| 23 | - public $type; |
|
| 24 | - |
|
| 25 | - /** |
|
| 26 | - * Value of the struct we are recording errors for. There are various |
|
| 27 | - * values for this: |
|
| 28 | - * - TOKEN: Instance of HTMLPurifier_Token |
|
| 29 | - * - ATTR: array('attr-name', 'value') |
|
| 30 | - * - CSSPROP: array('prop-name', 'value') |
|
| 31 | - */ |
|
| 32 | - public $value; |
|
| 33 | - |
|
| 34 | - /** |
|
| 35 | - * Errors registered for this structure. |
|
| 36 | - */ |
|
| 37 | - public $errors = array(); |
|
| 38 | - |
|
| 39 | - /** |
|
| 40 | - * Child ErrorStructs that are from this structure. For example, a TOKEN |
|
| 41 | - * ErrorStruct would contain ATTR ErrorStructs. This is a multi-dimensional |
|
| 42 | - * array in structure: [TYPE]['identifier'] |
|
| 43 | - */ |
|
| 44 | - public $children = array(); |
|
| 45 | - |
|
| 46 | - public function getChild($type, $id) { |
|
| 47 | - if (!isset($this->children[$type][$id])) { |
|
| 48 | - $this->children[$type][$id] = new HTMLPurifier_ErrorStruct(); |
|
| 49 | - $this->children[$type][$id]->type = $type; |
|
| 50 | - } |
|
| 51 | - return $this->children[$type][$id]; |
|
| 52 | - } |
|
| 53 | - |
|
| 54 | - public function addError($severity, $message) { |
|
| 55 | - $this->errors[] = array($severity, $message); |
|
| 56 | - } |
|
| 12 | + /** |
|
| 13 | + * Possible values for $children first-key. Note that top-level structures |
|
| 14 | + * are automatically token-level. |
|
| 15 | + */ |
|
| 16 | + const TOKEN = 0; |
|
| 17 | + const ATTR = 1; |
|
| 18 | + const CSSPROP = 2; |
|
| 19 | + |
|
| 20 | + /** |
|
| 21 | + * Type of this struct. |
|
| 22 | + */ |
|
| 23 | + public $type; |
|
| 24 | + |
|
| 25 | + /** |
|
| 26 | + * Value of the struct we are recording errors for. There are various |
|
| 27 | + * values for this: |
|
| 28 | + * - TOKEN: Instance of HTMLPurifier_Token |
|
| 29 | + * - ATTR: array('attr-name', 'value') |
|
| 30 | + * - CSSPROP: array('prop-name', 'value') |
|
| 31 | + */ |
|
| 32 | + public $value; |
|
| 33 | + |
|
| 34 | + /** |
|
| 35 | + * Errors registered for this structure. |
|
| 36 | + */ |
|
| 37 | + public $errors = array(); |
|
| 38 | + |
|
| 39 | + /** |
|
| 40 | + * Child ErrorStructs that are from this structure. For example, a TOKEN |
|
| 41 | + * ErrorStruct would contain ATTR ErrorStructs. This is a multi-dimensional |
|
| 42 | + * array in structure: [TYPE]['identifier'] |
|
| 43 | + */ |
|
| 44 | + public $children = array(); |
|
| 45 | + |
|
| 46 | + public function getChild($type, $id) { |
|
| 47 | + if (!isset($this->children[$type][$id])) { |
|
| 48 | + $this->children[$type][$id] = new HTMLPurifier_ErrorStruct(); |
|
| 49 | + $this->children[$type][$id]->type = $type; |
|
| 50 | + } |
|
| 51 | + return $this->children[$type][$id]; |
|
| 52 | + } |
|
| 53 | + |
|
| 54 | + public function addError($severity, $message) { |
|
| 55 | + $this->errors[] = array($severity, $message); |
|
| 56 | + } |
|
| 57 | 57 | |
| 58 | 58 | } |
| 59 | 59 | |
@@ -22,24 +22,24 @@ |
||
| 22 | 22 | class HTMLPurifier_Filter |
| 23 | 23 | { |
| 24 | 24 | |
| 25 | - /** |
|
| 26 | - * Name of the filter for identification purposes |
|
| 27 | - */ |
|
| 28 | - public $name; |
|
| 25 | + /** |
|
| 26 | + * Name of the filter for identification purposes |
|
| 27 | + */ |
|
| 28 | + public $name; |
|
| 29 | 29 | |
| 30 | - /** |
|
| 31 | - * Pre-processor function, handles HTML before HTML Purifier |
|
| 32 | - */ |
|
| 33 | - public function preFilter($html, $config, $context) { |
|
| 34 | - return $html; |
|
| 35 | - } |
|
| 30 | + /** |
|
| 31 | + * Pre-processor function, handles HTML before HTML Purifier |
|
| 32 | + */ |
|
| 33 | + public function preFilter($html, $config, $context) { |
|
| 34 | + return $html; |
|
| 35 | + } |
|
| 36 | 36 | |
| 37 | - /** |
|
| 38 | - * Post-processor function, handles HTML after HTML Purifier |
|
| 39 | - */ |
|
| 40 | - public function postFilter($html, $config, $context) { |
|
| 41 | - return $html; |
|
| 42 | - } |
|
| 37 | + /** |
|
| 38 | + * Post-processor function, handles HTML after HTML Purifier |
|
| 39 | + */ |
|
| 40 | + public function postFilter($html, $config, $context) { |
|
| 41 | + return $html; |
|
| 42 | + } |
|
| 43 | 43 | |
| 44 | 44 | } |
| 45 | 45 | |
@@ -23,265 +23,265 @@ |
||
| 23 | 23 | class HTMLPurifier_Filter_ExtractStyleBlocks extends HTMLPurifier_Filter |
| 24 | 24 | { |
| 25 | 25 | |
| 26 | - public $name = 'ExtractStyleBlocks'; |
|
| 27 | - private $_styleMatches = array(); |
|
| 28 | - private $_tidy; |
|
| 26 | + public $name = 'ExtractStyleBlocks'; |
|
| 27 | + private $_styleMatches = array(); |
|
| 28 | + private $_tidy; |
|
| 29 | 29 | |
| 30 | - private $_id_attrdef; |
|
| 31 | - private $_class_attrdef; |
|
| 32 | - private $_enum_attrdef; |
|
| 30 | + private $_id_attrdef; |
|
| 31 | + private $_class_attrdef; |
|
| 32 | + private $_enum_attrdef; |
|
| 33 | 33 | |
| 34 | - public function __construct() { |
|
| 35 | - $this->_tidy = new csstidy(); |
|
| 36 | - $this->_id_attrdef = new HTMLPurifier_AttrDef_HTML_ID(true); |
|
| 37 | - $this->_class_attrdef = new HTMLPurifier_AttrDef_CSS_Ident(); |
|
| 38 | - $this->_enum_attrdef = new HTMLPurifier_AttrDef_Enum(array('first-child', 'link', 'visited', 'active', 'hover', 'focus')); |
|
| 39 | - } |
|
| 34 | + public function __construct() { |
|
| 35 | + $this->_tidy = new csstidy(); |
|
| 36 | + $this->_id_attrdef = new HTMLPurifier_AttrDef_HTML_ID(true); |
|
| 37 | + $this->_class_attrdef = new HTMLPurifier_AttrDef_CSS_Ident(); |
|
| 38 | + $this->_enum_attrdef = new HTMLPurifier_AttrDef_Enum(array('first-child', 'link', 'visited', 'active', 'hover', 'focus')); |
|
| 39 | + } |
|
| 40 | 40 | |
| 41 | - /** |
|
| 42 | - * Save the contents of CSS blocks to style matches |
|
| 43 | - * @param $matches preg_replace style $matches array |
|
| 44 | - */ |
|
| 45 | - protected function styleCallback($matches) { |
|
| 46 | - $this->_styleMatches[] = $matches[1]; |
|
| 47 | - } |
|
| 41 | + /** |
|
| 42 | + * Save the contents of CSS blocks to style matches |
|
| 43 | + * @param $matches preg_replace style $matches array |
|
| 44 | + */ |
|
| 45 | + protected function styleCallback($matches) { |
|
| 46 | + $this->_styleMatches[] = $matches[1]; |
|
| 47 | + } |
|
| 48 | 48 | |
| 49 | - /** |
|
| 50 | - * Removes inline <style> tags from HTML, saves them for later use |
|
| 51 | - * @todo Extend to indicate non-text/css style blocks |
|
| 52 | - */ |
|
| 53 | - public function preFilter($html, $config, $context) { |
|
| 54 | - $tidy = $config->get('Filter.ExtractStyleBlocks.TidyImpl'); |
|
| 55 | - if ($tidy !== null) $this->_tidy = $tidy; |
|
| 56 | - $html = preg_replace_callback('#<style(?:\s.*)?>(.+)</style>#isU', array($this, 'styleCallback'), $html); |
|
| 57 | - $style_blocks = $this->_styleMatches; |
|
| 58 | - $this->_styleMatches = array(); // reset |
|
| 59 | - $context->register('StyleBlocks', $style_blocks); // $context must not be reused |
|
| 60 | - if ($this->_tidy) { |
|
| 61 | - foreach ($style_blocks as &$style) { |
|
| 62 | - $style = $this->cleanCSS($style, $config, $context); |
|
| 63 | - } |
|
| 64 | - } |
|
| 65 | - return $html; |
|
| 66 | - } |
|
| 49 | + /** |
|
| 50 | + * Removes inline <style> tags from HTML, saves them for later use |
|
| 51 | + * @todo Extend to indicate non-text/css style blocks |
|
| 52 | + */ |
|
| 53 | + public function preFilter($html, $config, $context) { |
|
| 54 | + $tidy = $config->get('Filter.ExtractStyleBlocks.TidyImpl'); |
|
| 55 | + if ($tidy !== null) $this->_tidy = $tidy; |
|
| 56 | + $html = preg_replace_callback('#<style(?:\s.*)?>(.+)</style>#isU', array($this, 'styleCallback'), $html); |
|
| 57 | + $style_blocks = $this->_styleMatches; |
|
| 58 | + $this->_styleMatches = array(); // reset |
|
| 59 | + $context->register('StyleBlocks', $style_blocks); // $context must not be reused |
|
| 60 | + if ($this->_tidy) { |
|
| 61 | + foreach ($style_blocks as &$style) { |
|
| 62 | + $style = $this->cleanCSS($style, $config, $context); |
|
| 63 | + } |
|
| 64 | + } |
|
| 65 | + return $html; |
|
| 66 | + } |
|
| 67 | 67 | |
| 68 | - /** |
|
| 69 | - * Takes CSS (the stuff found in <style>) and cleans it. |
|
| 70 | - * @warning Requires CSSTidy <http://csstidy.sourceforge.net/> |
|
| 71 | - * @param $css CSS styling to clean |
|
| 72 | - * @param $config Instance of HTMLPurifier_Config |
|
| 73 | - * @param $context Instance of HTMLPurifier_Context |
|
| 74 | - * @return Cleaned CSS |
|
| 75 | - */ |
|
| 76 | - public function cleanCSS($css, $config, $context) { |
|
| 77 | - // prepare scope |
|
| 78 | - $scope = $config->get('Filter.ExtractStyleBlocks.Scope'); |
|
| 79 | - if ($scope !== null) { |
|
| 80 | - $scopes = array_map('trim', explode(',', $scope)); |
|
| 81 | - } else { |
|
| 82 | - $scopes = array(); |
|
| 83 | - } |
|
| 84 | - // remove comments from CSS |
|
| 85 | - $css = trim($css); |
|
| 86 | - if (strncmp('<!--', $css, 4) === 0) { |
|
| 87 | - $css = substr($css, 4); |
|
| 88 | - } |
|
| 89 | - if (strlen($css) > 3 && substr($css, -3) == '-->') { |
|
| 90 | - $css = substr($css, 0, -3); |
|
| 91 | - } |
|
| 92 | - $css = trim($css); |
|
| 93 | - set_error_handler('htmlpurifier_filter_extractstyleblocks_muteerrorhandler'); |
|
| 94 | - $this->_tidy->parse($css); |
|
| 95 | - restore_error_handler(); |
|
| 96 | - $css_definition = $config->getDefinition('CSS'); |
|
| 97 | - $html_definition = $config->getDefinition('HTML'); |
|
| 98 | - $new_css = array(); |
|
| 99 | - foreach ($this->_tidy->css as $k => $decls) { |
|
| 100 | - // $decls are all CSS declarations inside an @ selector |
|
| 101 | - $new_decls = array(); |
|
| 102 | - foreach ($decls as $selector => $style) { |
|
| 103 | - $selector = trim($selector); |
|
| 104 | - if ($selector === '') continue; // should not happen |
|
| 105 | - // Parse the selector |
|
| 106 | - // Here is the relevant part of the CSS grammar: |
|
| 107 | - // |
|
| 108 | - // ruleset |
|
| 109 | - // : selector [ ',' S* selector ]* '{' ... |
|
| 110 | - // selector |
|
| 111 | - // : simple_selector [ combinator selector | S+ [ combinator? selector ]? ]? |
|
| 112 | - // combinator |
|
| 113 | - // : '+' S* |
|
| 114 | - // : '>' S* |
|
| 115 | - // simple_selector |
|
| 116 | - // : element_name [ HASH | class | attrib | pseudo ]* |
|
| 117 | - // | [ HASH | class | attrib | pseudo ]+ |
|
| 118 | - // element_name |
|
| 119 | - // : IDENT | '*' |
|
| 120 | - // ; |
|
| 121 | - // class |
|
| 122 | - // : '.' IDENT |
|
| 123 | - // ; |
|
| 124 | - // attrib |
|
| 125 | - // : '[' S* IDENT S* [ [ '=' | INCLUDES | DASHMATCH ] S* |
|
| 126 | - // [ IDENT | STRING ] S* ]? ']' |
|
| 127 | - // ; |
|
| 128 | - // pseudo |
|
| 129 | - // : ':' [ IDENT | FUNCTION S* [IDENT S*]? ')' ] |
|
| 130 | - // ; |
|
| 131 | - // |
|
| 132 | - // For reference, here are the relevant tokens: |
|
| 133 | - // |
|
| 134 | - // HASH #{name} |
|
| 135 | - // IDENT {ident} |
|
| 136 | - // INCLUDES == |
|
| 137 | - // DASHMATCH |= |
|
| 138 | - // STRING {string} |
|
| 139 | - // FUNCTION {ident}\( |
|
| 140 | - // |
|
| 141 | - // And the lexical scanner tokens |
|
| 142 | - // |
|
| 143 | - // name {nmchar}+ |
|
| 144 | - // nmchar [_a-z0-9-]|{nonascii}|{escape} |
|
| 145 | - // nonascii [\240-\377] |
|
| 146 | - // escape {unicode}|\\[^\r\n\f0-9a-f] |
|
| 147 | - // unicode \\{h}}{1,6}(\r\n|[ \t\r\n\f])? |
|
| 148 | - // ident -?{nmstart}{nmchar*} |
|
| 149 | - // nmstart [_a-z]|{nonascii}|{escape} |
|
| 150 | - // string {string1}|{string2} |
|
| 151 | - // string1 \"([^\n\r\f\\"]|\\{nl}|{escape})*\" |
|
| 152 | - // string2 \'([^\n\r\f\\"]|\\{nl}|{escape})*\' |
|
| 153 | - // |
|
| 154 | - // We'll implement a subset (in order to reduce attack |
|
| 155 | - // surface); in particular: |
|
| 156 | - // |
|
| 157 | - // - No Unicode support |
|
| 158 | - // - No escapes support |
|
| 159 | - // - No string support (by proxy no attrib support) |
|
| 160 | - // - element_name is matched against allowed |
|
| 161 | - // elements (some people might find this |
|
| 162 | - // annoying...) |
|
| 163 | - // - Pseudo-elements one of :first-child, :link, |
|
| 164 | - // :visited, :active, :hover, :focus |
|
| 68 | + /** |
|
| 69 | + * Takes CSS (the stuff found in <style>) and cleans it. |
|
| 70 | + * @warning Requires CSSTidy <http://csstidy.sourceforge.net/> |
|
| 71 | + * @param $css CSS styling to clean |
|
| 72 | + * @param $config Instance of HTMLPurifier_Config |
|
| 73 | + * @param $context Instance of HTMLPurifier_Context |
|
| 74 | + * @return Cleaned CSS |
|
| 75 | + */ |
|
| 76 | + public function cleanCSS($css, $config, $context) { |
|
| 77 | + // prepare scope |
|
| 78 | + $scope = $config->get('Filter.ExtractStyleBlocks.Scope'); |
|
| 79 | + if ($scope !== null) { |
|
| 80 | + $scopes = array_map('trim', explode(',', $scope)); |
|
| 81 | + } else { |
|
| 82 | + $scopes = array(); |
|
| 83 | + } |
|
| 84 | + // remove comments from CSS |
|
| 85 | + $css = trim($css); |
|
| 86 | + if (strncmp('<!--', $css, 4) === 0) { |
|
| 87 | + $css = substr($css, 4); |
|
| 88 | + } |
|
| 89 | + if (strlen($css) > 3 && substr($css, -3) == '-->') { |
|
| 90 | + $css = substr($css, 0, -3); |
|
| 91 | + } |
|
| 92 | + $css = trim($css); |
|
| 93 | + set_error_handler('htmlpurifier_filter_extractstyleblocks_muteerrorhandler'); |
|
| 94 | + $this->_tidy->parse($css); |
|
| 95 | + restore_error_handler(); |
|
| 96 | + $css_definition = $config->getDefinition('CSS'); |
|
| 97 | + $html_definition = $config->getDefinition('HTML'); |
|
| 98 | + $new_css = array(); |
|
| 99 | + foreach ($this->_tidy->css as $k => $decls) { |
|
| 100 | + // $decls are all CSS declarations inside an @ selector |
|
| 101 | + $new_decls = array(); |
|
| 102 | + foreach ($decls as $selector => $style) { |
|
| 103 | + $selector = trim($selector); |
|
| 104 | + if ($selector === '') continue; // should not happen |
|
| 105 | + // Parse the selector |
|
| 106 | + // Here is the relevant part of the CSS grammar: |
|
| 107 | + // |
|
| 108 | + // ruleset |
|
| 109 | + // : selector [ ',' S* selector ]* '{' ... |
|
| 110 | + // selector |
|
| 111 | + // : simple_selector [ combinator selector | S+ [ combinator? selector ]? ]? |
|
| 112 | + // combinator |
|
| 113 | + // : '+' S* |
|
| 114 | + // : '>' S* |
|
| 115 | + // simple_selector |
|
| 116 | + // : element_name [ HASH | class | attrib | pseudo ]* |
|
| 117 | + // | [ HASH | class | attrib | pseudo ]+ |
|
| 118 | + // element_name |
|
| 119 | + // : IDENT | '*' |
|
| 120 | + // ; |
|
| 121 | + // class |
|
| 122 | + // : '.' IDENT |
|
| 123 | + // ; |
|
| 124 | + // attrib |
|
| 125 | + // : '[' S* IDENT S* [ [ '=' | INCLUDES | DASHMATCH ] S* |
|
| 126 | + // [ IDENT | STRING ] S* ]? ']' |
|
| 127 | + // ; |
|
| 128 | + // pseudo |
|
| 129 | + // : ':' [ IDENT | FUNCTION S* [IDENT S*]? ')' ] |
|
| 130 | + // ; |
|
| 131 | + // |
|
| 132 | + // For reference, here are the relevant tokens: |
|
| 133 | + // |
|
| 134 | + // HASH #{name} |
|
| 135 | + // IDENT {ident} |
|
| 136 | + // INCLUDES == |
|
| 137 | + // DASHMATCH |= |
|
| 138 | + // STRING {string} |
|
| 139 | + // FUNCTION {ident}\( |
|
| 140 | + // |
|
| 141 | + // And the lexical scanner tokens |
|
| 142 | + // |
|
| 143 | + // name {nmchar}+ |
|
| 144 | + // nmchar [_a-z0-9-]|{nonascii}|{escape} |
|
| 145 | + // nonascii [\240-\377] |
|
| 146 | + // escape {unicode}|\\[^\r\n\f0-9a-f] |
|
| 147 | + // unicode \\{h}}{1,6}(\r\n|[ \t\r\n\f])? |
|
| 148 | + // ident -?{nmstart}{nmchar*} |
|
| 149 | + // nmstart [_a-z]|{nonascii}|{escape} |
|
| 150 | + // string {string1}|{string2} |
|
| 151 | + // string1 \"([^\n\r\f\\"]|\\{nl}|{escape})*\" |
|
| 152 | + // string2 \'([^\n\r\f\\"]|\\{nl}|{escape})*\' |
|
| 153 | + // |
|
| 154 | + // We'll implement a subset (in order to reduce attack |
|
| 155 | + // surface); in particular: |
|
| 156 | + // |
|
| 157 | + // - No Unicode support |
|
| 158 | + // - No escapes support |
|
| 159 | + // - No string support (by proxy no attrib support) |
|
| 160 | + // - element_name is matched against allowed |
|
| 161 | + // elements (some people might find this |
|
| 162 | + // annoying...) |
|
| 163 | + // - Pseudo-elements one of :first-child, :link, |
|
| 164 | + // :visited, :active, :hover, :focus |
|
| 165 | 165 | |
| 166 | - // handle ruleset |
|
| 167 | - $selectors = array_map('trim', explode(',', $selector)); |
|
| 168 | - $new_selectors = array(); |
|
| 169 | - foreach ($selectors as $sel) { |
|
| 170 | - // split on +, > and spaces |
|
| 171 | - $basic_selectors = preg_split('/\s*([+> ])\s*/', $sel, -1, PREG_SPLIT_DELIM_CAPTURE); |
|
| 172 | - // even indices are chunks, odd indices are |
|
| 173 | - // delimiters |
|
| 174 | - $nsel = null; |
|
| 175 | - $delim = null; // guaranteed to be non-null after |
|
| 176 | - // two loop iterations |
|
| 177 | - for ($i = 0, $c = count($basic_selectors); $i < $c; $i++) { |
|
| 178 | - $x = $basic_selectors[$i]; |
|
| 179 | - if ($i % 2) { |
|
| 180 | - // delimiter |
|
| 181 | - if ($x === ' ') { |
|
| 182 | - $delim = ' '; |
|
| 183 | - } else { |
|
| 184 | - $delim = ' ' . $x . ' '; |
|
| 185 | - } |
|
| 186 | - } else { |
|
| 187 | - // simple selector |
|
| 188 | - $components = preg_split('/([#.:])/', $x, -1, PREG_SPLIT_DELIM_CAPTURE); |
|
| 189 | - $sdelim = null; |
|
| 190 | - $nx = null; |
|
| 191 | - for ($j = 0, $cc = count($components); $j < $cc; $j ++) { |
|
| 192 | - $y = $components[$j]; |
|
| 193 | - if ($j === 0) { |
|
| 194 | - if ($y === '*' || isset($html_definition->info[$y = strtolower($y)])) { |
|
| 195 | - $nx = $y; |
|
| 196 | - } else { |
|
| 197 | - // $nx stays null; this matters |
|
| 198 | - // if we don't manage to find |
|
| 199 | - // any valid selector content, |
|
| 200 | - // in which case we ignore the |
|
| 201 | - // outer $delim |
|
| 202 | - } |
|
| 203 | - } elseif ($j % 2) { |
|
| 204 | - // set delimiter |
|
| 205 | - $sdelim = $y; |
|
| 206 | - } else { |
|
| 207 | - $attrdef = null; |
|
| 208 | - if ($sdelim === '#') { |
|
| 209 | - $attrdef = $this->_id_attrdef; |
|
| 210 | - } elseif ($sdelim === '.') { |
|
| 211 | - $attrdef = $this->_class_attrdef; |
|
| 212 | - } elseif ($sdelim === ':') { |
|
| 213 | - $attrdef = $this->_enum_attrdef; |
|
| 214 | - } else { |
|
| 215 | - throw new HTMLPurifier_Exception('broken invariant sdelim and preg_split'); |
|
| 216 | - } |
|
| 217 | - $r = $attrdef->validate($y, $config, $context); |
|
| 218 | - if ($r !== false) { |
|
| 219 | - if ($r !== true) { |
|
| 220 | - $y = $r; |
|
| 221 | - } |
|
| 222 | - if ($nx === null) { |
|
| 223 | - $nx = ''; |
|
| 224 | - } |
|
| 225 | - $nx .= $sdelim . $y; |
|
| 226 | - } |
|
| 227 | - } |
|
| 228 | - } |
|
| 229 | - if ($nx !== null) { |
|
| 230 | - if ($nsel === null) { |
|
| 231 | - $nsel = $nx; |
|
| 232 | - } else { |
|
| 233 | - $nsel .= $delim . $nx; |
|
| 234 | - } |
|
| 235 | - } else { |
|
| 236 | - // delimiters to the left of invalid |
|
| 237 | - // basic selector ignored |
|
| 238 | - } |
|
| 239 | - } |
|
| 240 | - } |
|
| 241 | - if ($nsel !== null) { |
|
| 242 | - if (!empty($scopes)) { |
|
| 243 | - foreach ($scopes as $s) { |
|
| 244 | - $new_selectors[] = "$s $nsel"; |
|
| 245 | - } |
|
| 246 | - } else { |
|
| 247 | - $new_selectors[] = $nsel; |
|
| 248 | - } |
|
| 249 | - } |
|
| 250 | - } |
|
| 251 | - if (empty($new_selectors)) continue; |
|
| 252 | - $selector = implode(', ', $new_selectors); |
|
| 253 | - foreach ($style as $name => $value) { |
|
| 254 | - if (!isset($css_definition->info[$name])) { |
|
| 255 | - unset($style[$name]); |
|
| 256 | - continue; |
|
| 257 | - } |
|
| 258 | - $def = $css_definition->info[$name]; |
|
| 259 | - $ret = $def->validate($value, $config, $context); |
|
| 260 | - if ($ret === false) unset($style[$name]); |
|
| 261 | - else $style[$name] = $ret; |
|
| 262 | - } |
|
| 263 | - $new_decls[$selector] = $style; |
|
| 264 | - } |
|
| 265 | - $new_css[$k] = $new_decls; |
|
| 266 | - } |
|
| 267 | - // remove stuff that shouldn't be used, could be reenabled |
|
| 268 | - // after security risks are analyzed |
|
| 269 | - $this->_tidy->css = $new_css; |
|
| 270 | - $this->_tidy->import = array(); |
|
| 271 | - $this->_tidy->charset = null; |
|
| 272 | - $this->_tidy->namespace = null; |
|
| 273 | - $css = $this->_tidy->print->plain(); |
|
| 274 | - // we are going to escape any special characters <>& to ensure |
|
| 275 | - // that no funny business occurs (i.e. </style> in a font-family prop). |
|
| 276 | - if ($config->get('Filter.ExtractStyleBlocks.Escaping')) { |
|
| 277 | - $css = str_replace( |
|
| 278 | - array('<', '>', '&'), |
|
| 279 | - array('\3C ', '\3E ', '\26 '), |
|
| 280 | - $css |
|
| 281 | - ); |
|
| 282 | - } |
|
| 283 | - return $css; |
|
| 284 | - } |
|
| 166 | + // handle ruleset |
|
| 167 | + $selectors = array_map('trim', explode(',', $selector)); |
|
| 168 | + $new_selectors = array(); |
|
| 169 | + foreach ($selectors as $sel) { |
|
| 170 | + // split on +, > and spaces |
|
| 171 | + $basic_selectors = preg_split('/\s*([+> ])\s*/', $sel, -1, PREG_SPLIT_DELIM_CAPTURE); |
|
| 172 | + // even indices are chunks, odd indices are |
|
| 173 | + // delimiters |
|
| 174 | + $nsel = null; |
|
| 175 | + $delim = null; // guaranteed to be non-null after |
|
| 176 | + // two loop iterations |
|
| 177 | + for ($i = 0, $c = count($basic_selectors); $i < $c; $i++) { |
|
| 178 | + $x = $basic_selectors[$i]; |
|
| 179 | + if ($i % 2) { |
|
| 180 | + // delimiter |
|
| 181 | + if ($x === ' ') { |
|
| 182 | + $delim = ' '; |
|
| 183 | + } else { |
|
| 184 | + $delim = ' ' . $x . ' '; |
|
| 185 | + } |
|
| 186 | + } else { |
|
| 187 | + // simple selector |
|
| 188 | + $components = preg_split('/([#.:])/', $x, -1, PREG_SPLIT_DELIM_CAPTURE); |
|
| 189 | + $sdelim = null; |
|
| 190 | + $nx = null; |
|
| 191 | + for ($j = 0, $cc = count($components); $j < $cc; $j ++) { |
|
| 192 | + $y = $components[$j]; |
|
| 193 | + if ($j === 0) { |
|
| 194 | + if ($y === '*' || isset($html_definition->info[$y = strtolower($y)])) { |
|
| 195 | + $nx = $y; |
|
| 196 | + } else { |
|
| 197 | + // $nx stays null; this matters |
|
| 198 | + // if we don't manage to find |
|
| 199 | + // any valid selector content, |
|
| 200 | + // in which case we ignore the |
|
| 201 | + // outer $delim |
|
| 202 | + } |
|
| 203 | + } elseif ($j % 2) { |
|
| 204 | + // set delimiter |
|
| 205 | + $sdelim = $y; |
|
| 206 | + } else { |
|
| 207 | + $attrdef = null; |
|
| 208 | + if ($sdelim === '#') { |
|
| 209 | + $attrdef = $this->_id_attrdef; |
|
| 210 | + } elseif ($sdelim === '.') { |
|
| 211 | + $attrdef = $this->_class_attrdef; |
|
| 212 | + } elseif ($sdelim === ':') { |
|
| 213 | + $attrdef = $this->_enum_attrdef; |
|
| 214 | + } else { |
|
| 215 | + throw new HTMLPurifier_Exception('broken invariant sdelim and preg_split'); |
|
| 216 | + } |
|
| 217 | + $r = $attrdef->validate($y, $config, $context); |
|
| 218 | + if ($r !== false) { |
|
| 219 | + if ($r !== true) { |
|
| 220 | + $y = $r; |
|
| 221 | + } |
|
| 222 | + if ($nx === null) { |
|
| 223 | + $nx = ''; |
|
| 224 | + } |
|
| 225 | + $nx .= $sdelim . $y; |
|
| 226 | + } |
|
| 227 | + } |
|
| 228 | + } |
|
| 229 | + if ($nx !== null) { |
|
| 230 | + if ($nsel === null) { |
|
| 231 | + $nsel = $nx; |
|
| 232 | + } else { |
|
| 233 | + $nsel .= $delim . $nx; |
|
| 234 | + } |
|
| 235 | + } else { |
|
| 236 | + // delimiters to the left of invalid |
|
| 237 | + // basic selector ignored |
|
| 238 | + } |
|
| 239 | + } |
|
| 240 | + } |
|
| 241 | + if ($nsel !== null) { |
|
| 242 | + if (!empty($scopes)) { |
|
| 243 | + foreach ($scopes as $s) { |
|
| 244 | + $new_selectors[] = "$s $nsel"; |
|
| 245 | + } |
|
| 246 | + } else { |
|
| 247 | + $new_selectors[] = $nsel; |
|
| 248 | + } |
|
| 249 | + } |
|
| 250 | + } |
|
| 251 | + if (empty($new_selectors)) continue; |
|
| 252 | + $selector = implode(', ', $new_selectors); |
|
| 253 | + foreach ($style as $name => $value) { |
|
| 254 | + if (!isset($css_definition->info[$name])) { |
|
| 255 | + unset($style[$name]); |
|
| 256 | + continue; |
|
| 257 | + } |
|
| 258 | + $def = $css_definition->info[$name]; |
|
| 259 | + $ret = $def->validate($value, $config, $context); |
|
| 260 | + if ($ret === false) unset($style[$name]); |
|
| 261 | + else $style[$name] = $ret; |
|
| 262 | + } |
|
| 263 | + $new_decls[$selector] = $style; |
|
| 264 | + } |
|
| 265 | + $new_css[$k] = $new_decls; |
|
| 266 | + } |
|
| 267 | + // remove stuff that shouldn't be used, could be reenabled |
|
| 268 | + // after security risks are analyzed |
|
| 269 | + $this->_tidy->css = $new_css; |
|
| 270 | + $this->_tidy->import = array(); |
|
| 271 | + $this->_tidy->charset = null; |
|
| 272 | + $this->_tidy->namespace = null; |
|
| 273 | + $css = $this->_tidy->print->plain(); |
|
| 274 | + // we are going to escape any special characters <>& to ensure |
|
| 275 | + // that no funny business occurs (i.e. </style> in a font-family prop). |
|
| 276 | + if ($config->get('Filter.ExtractStyleBlocks.Escaping')) { |
|
| 277 | + $css = str_replace( |
|
| 278 | + array('<', '>', '&'), |
|
| 279 | + array('\3C ', '\3E ', '\26 '), |
|
| 280 | + $css |
|
| 281 | + ); |
|
| 282 | + } |
|
| 283 | + return $css; |
|
| 284 | + } |
|
| 285 | 285 | |
| 286 | 286 | } |
| 287 | 287 | |
@@ -52,7 +52,9 @@ discard block |
||
| 52 | 52 | */ |
| 53 | 53 | public function preFilter($html, $config, $context) { |
| 54 | 54 | $tidy = $config->get('Filter.ExtractStyleBlocks.TidyImpl'); |
| 55 | - if ($tidy !== null) $this->_tidy = $tidy; |
|
| 55 | + if ($tidy !== null) { |
|
| 56 | + $this->_tidy = $tidy; |
|
| 57 | + } |
|
| 56 | 58 | $html = preg_replace_callback('#<style(?:\s.*)?>(.+)</style>#isU', array($this, 'styleCallback'), $html); |
| 57 | 59 | $style_blocks = $this->_styleMatches; |
| 58 | 60 | $this->_styleMatches = array(); // reset |
@@ -101,7 +103,10 @@ discard block |
||
| 101 | 103 | $new_decls = array(); |
| 102 | 104 | foreach ($decls as $selector => $style) { |
| 103 | 105 | $selector = trim($selector); |
| 104 | - if ($selector === '') continue; // should not happen |
|
| 106 | + if ($selector === '') { |
|
| 107 | + continue; |
|
| 108 | + } |
|
| 109 | + // should not happen |
|
| 105 | 110 | // Parse the selector |
| 106 | 111 | // Here is the relevant part of the CSS grammar: |
| 107 | 112 | // |
@@ -248,7 +253,9 @@ discard block |
||
| 248 | 253 | } |
| 249 | 254 | } |
| 250 | 255 | } |
| 251 | - if (empty($new_selectors)) continue; |
|
| 256 | + if (empty($new_selectors)) { |
|
| 257 | + continue; |
|
| 258 | + } |
|
| 252 | 259 | $selector = implode(', ', $new_selectors); |
| 253 | 260 | foreach ($style as $name => $value) { |
| 254 | 261 | if (!isset($css_definition->info[$name])) { |
@@ -257,8 +264,11 @@ discard block |
||
| 257 | 264 | } |
| 258 | 265 | $def = $css_definition->info[$name]; |
| 259 | 266 | $ret = $def->validate($value, $config, $context); |
| 260 | - if ($ret === false) unset($style[$name]); |
|
| 261 | - else $style[$name] = $ret; |
|
| 267 | + if ($ret === false) { |
|
| 268 | + unset($style[$name]); |
|
| 269 | + } else { |
|
| 270 | + $style[$name] = $ret; |
|
| 271 | + } |
|
| 262 | 272 | } |
| 263 | 273 | $new_decls[$selector] = $style; |
| 264 | 274 | } |
@@ -181,14 +181,14 @@ discard block |
||
| 181 | 181 | if ($x === ' ') { |
| 182 | 182 | $delim = ' '; |
| 183 | 183 | } else { |
| 184 | - $delim = ' ' . $x . ' '; |
|
| 184 | + $delim = ' '.$x.' '; |
|
| 185 | 185 | } |
| 186 | 186 | } else { |
| 187 | 187 | // simple selector |
| 188 | 188 | $components = preg_split('/([#.:])/', $x, -1, PREG_SPLIT_DELIM_CAPTURE); |
| 189 | 189 | $sdelim = null; |
| 190 | 190 | $nx = null; |
| 191 | - for ($j = 0, $cc = count($components); $j < $cc; $j ++) { |
|
| 191 | + for ($j = 0, $cc = count($components); $j < $cc; $j++) { |
|
| 192 | 192 | $y = $components[$j]; |
| 193 | 193 | if ($j === 0) { |
| 194 | 194 | if ($y === '*' || isset($html_definition->info[$y = strtolower($y)])) { |
@@ -222,7 +222,7 @@ discard block |
||
| 222 | 222 | if ($nx === null) { |
| 223 | 223 | $nx = ''; |
| 224 | 224 | } |
| 225 | - $nx .= $sdelim . $y; |
|
| 225 | + $nx .= $sdelim.$y; |
|
| 226 | 226 | } |
| 227 | 227 | } |
| 228 | 228 | } |
@@ -230,7 +230,7 @@ discard block |
||
| 230 | 230 | if ($nsel === null) { |
| 231 | 231 | $nsel = $nx; |
| 232 | 232 | } else { |
| 233 | - $nsel .= $delim . $nx; |
|
| 233 | + $nsel .= $delim.$nx; |
|
| 234 | 234 | } |
| 235 | 235 | } else { |
| 236 | 236 | // delimiters to the left of invalid |
@@ -275,7 +275,7 @@ discard block |
||
| 275 | 275 | // that no funny business occurs (i.e. </style> in a font-family prop). |
| 276 | 276 | if ($config->get('Filter.ExtractStyleBlocks.Escaping')) { |
| 277 | 277 | $css = str_replace( |
| 278 | - array('<', '>', '&'), |
|
| 278 | + array('<', '>', '&'), |
|
| 279 | 279 | array('\3C ', '\3E ', '\26 '), |
| 280 | 280 | $css |
| 281 | 281 | ); |
@@ -3,37 +3,37 @@ |
||
| 3 | 3 | class HTMLPurifier_Filter_YouTube extends HTMLPurifier_Filter |
| 4 | 4 | { |
| 5 | 5 | |
| 6 | - public $name = 'YouTube'; |
|
| 7 | - |
|
| 8 | - public function preFilter($html, $config, $context) { |
|
| 9 | - $pre_regex = '#<object[^>]+>.+?'. |
|
| 10 | - 'http://www.youtube.com/((?:v|cp)/[A-Za-z0-9\-_=]+).+?</object>#s'; |
|
| 11 | - $pre_replace = '<span class="youtube-embed">\1</span>'; |
|
| 12 | - return preg_replace($pre_regex, $pre_replace, $html); |
|
| 13 | - } |
|
| 14 | - |
|
| 15 | - public function postFilter($html, $config, $context) { |
|
| 16 | - $post_regex = '#<span class="youtube-embed">((?:v|cp)/[A-Za-z0-9\-_=]+)</span>#'; |
|
| 17 | - return preg_replace_callback($post_regex, array($this, 'postFilterCallback'), $html); |
|
| 18 | - } |
|
| 19 | - |
|
| 20 | - protected function armorUrl($url) { |
|
| 21 | - return str_replace('--', '--', $url); |
|
| 22 | - } |
|
| 23 | - |
|
| 24 | - protected function postFilterCallback($matches) { |
|
| 25 | - $url = $this->armorUrl($matches[1]); |
|
| 26 | - return '<object width="425" height="350" type="application/x-shockwave-flash" '. |
|
| 27 | - 'data="http://www.youtube.com/'.$url.'">'. |
|
| 28 | - '<param name="movie" value="http://www.youtube.com/'.$url.'"></param>'. |
|
| 29 | - '<!--[if IE]>'. |
|
| 30 | - '<embed src="http://www.youtube.com/'.$url.'"'. |
|
| 31 | - 'type="application/x-shockwave-flash"'. |
|
| 32 | - 'wmode="transparent" width="425" height="350" />'. |
|
| 33 | - '<![endif]-->'. |
|
| 34 | - '</object>'; |
|
| 35 | - |
|
| 36 | - } |
|
| 6 | + public $name = 'YouTube'; |
|
| 7 | + |
|
| 8 | + public function preFilter($html, $config, $context) { |
|
| 9 | + $pre_regex = '#<object[^>]+>.+?'. |
|
| 10 | + 'http://www.youtube.com/((?:v|cp)/[A-Za-z0-9\-_=]+).+?</object>#s'; |
|
| 11 | + $pre_replace = '<span class="youtube-embed">\1</span>'; |
|
| 12 | + return preg_replace($pre_regex, $pre_replace, $html); |
|
| 13 | + } |
|
| 14 | + |
|
| 15 | + public function postFilter($html, $config, $context) { |
|
| 16 | + $post_regex = '#<span class="youtube-embed">((?:v|cp)/[A-Za-z0-9\-_=]+)</span>#'; |
|
| 17 | + return preg_replace_callback($post_regex, array($this, 'postFilterCallback'), $html); |
|
| 18 | + } |
|
| 19 | + |
|
| 20 | + protected function armorUrl($url) { |
|
| 21 | + return str_replace('--', '--', $url); |
|
| 22 | + } |
|
| 23 | + |
|
| 24 | + protected function postFilterCallback($matches) { |
|
| 25 | + $url = $this->armorUrl($matches[1]); |
|
| 26 | + return '<object width="425" height="350" type="application/x-shockwave-flash" '. |
|
| 27 | + 'data="http://www.youtube.com/'.$url.'">'. |
|
| 28 | + '<param name="movie" value="http://www.youtube.com/'.$url.'"></param>'. |
|
| 29 | + '<!--[if IE]>'. |
|
| 30 | + '<embed src="http://www.youtube.com/'.$url.'"'. |
|
| 31 | + 'type="application/x-shockwave-flash"'. |
|
| 32 | + 'wmode="transparent" width="425" height="350" />'. |
|
| 33 | + '<![endif]-->'. |
|
| 34 | + '</object>'; |
|
| 35 | + |
|
| 36 | + } |
|
| 37 | 37 | } |
| 38 | 38 | |
| 39 | 39 | // vim: et sw=4 sts=4 |