Completed
Push — master ( c17796...052b15 )
by Damian
01:29
created

HTMLEditorSanitiser::addValidElements()   F

Complexity

Conditions 27
Paths 1026

Size

Total Lines 96
Code Lines 57

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
cc 27
eloc 57
nc 1026
nop 1
dl 0
loc 96
rs 2
c 0
b 0
f 0

How to fix   Long Method    Complexity   

Long Method

Small methods make your code easier to understand, in particular if combined with a good name. Besides, if your method is small, finding a good name is usually much easier.

For example, if you find yourself adding comments to a method's body, this is usually a good sign to extract the commented part to a new method, and use the comment as a starting point when coming up with a good name for this new method.

Commonly applied refactorings include:

1
<?php
2
3
namespace SilverStripe\Forms\HTMLEditor;
4
5
use DOMAttr;
6
use DOMElement;
7
use DOMNode;
8
use SilverStripe\Core\Injector\Injectable;
9
use SilverStripe\View\Parsers\HTMLValue;
10
use stdClass;
11
12
/**
13
 * Sanitises an HTMLValue so it's contents are the elements and attributes that are whitelisted
14
 * using the same configuration as TinyMCE
15
 *
16
 * See www.tinymce.com/wiki.php/configuration:valid_elements for details on the spec of TinyMCE's
17
 * whitelist configuration
18
 */
19
class HTMLEditorSanitiser
20
{
21
    use Injectable;
22
23
    /** @var [stdClass] - $element => $rule hash for whitelist element rules where the element name isn't a pattern */
0 ignored issues
show
Documentation Bug introduced by
The doc comment [stdClass] at position 0 could not be parsed: Unknown type name '[' at position 0 in [stdClass].
Loading history...
24
    protected $elements = array();
25
    /** @var [stdClass] - Sequential list of whitelist element rules where the element name is a pattern */
0 ignored issues
show
Documentation Bug introduced by
The doc comment [stdClass] at position 0 could not be parsed: Unknown type name '[' at position 0 in [stdClass].
Loading history...
26
    protected $elementPatterns = array();
27
28
    /** @var [stdClass] - The list of attributes that apply to all further whitelisted elements added */
0 ignored issues
show
Documentation Bug introduced by
The doc comment [stdClass] at position 0 could not be parsed: Unknown type name '[' at position 0 in [stdClass].
Loading history...
29
    protected $globalAttributes = array();
30
31
    /**
32
     * Construct a sanitiser from a given HTMLEditorConfig
33
     *
34
     * Note that we build data structures from the current state of HTMLEditorConfig - later changes to
35
     * the passed instance won't cause this instance to update it's whitelist
36
     *
37
     * @param HTMLEditorConfig $config
38
     */
39
    public function __construct(HTMLEditorConfig $config)
40
    {
41
        $valid = $config->getOption('valid_elements');
42
        if ($valid) {
43
            $this->addValidElements($valid);
44
        }
45
46
        $valid = $config->getOption('extended_valid_elements');
47
        if ($valid) {
48
            $this->addValidElements($valid);
49
        }
50
    }
51
52
    /**
53
     * Given a TinyMCE pattern (close to unix glob style), create a regex that does the match
54
     *
55
     * @param $str - The TinyMCE pattern
56
     * @return string - The equivalent regex
57
     */
0 ignored issues
show
Documentation Bug introduced by
The doc comment - at position 0 could not be parsed: Unknown type name '-' at position 0 in -.
Loading history...
58
    protected function patternToRegex($str)
59
    {
60
        return '/^' . preg_replace('/([?+*])/', '.$1', $str) . '$/';
61
    }
62
63
    /**
64
     * Given a valid_elements string, parse out the actual element and attribute rules and add to the
65
     * internal whitelist
66
     *
67
     * Logic based heavily on javascript version from tiny_mce_src.js
68
     *
69
     * @param string $validElements - The valid_elements or extended_valid_elements string to add to the whitelist
70
     */
71
    protected function addValidElements($validElements)
72
    {
73
        $elementRuleRegExp = '/^([#+\-])?([^\[\/]+)(?:\/([^\[]+))?(?:\[([^\]]+)\])?$/';
74
        $attrRuleRegExp = '/^([!\-])?(\w+::\w+|[^=:<]+)?(?:([=:<])(.*))?$/';
75
        $hasPatternsRegExp = '/[*?+]/';
76
77
        foreach (explode(',', $validElements) as $validElement) {
78
            if (preg_match($elementRuleRegExp, $validElement, $matches)) {
79
                $prefix = isset($matches[1]) ? $matches[1] : null;
80
                $elementName = isset($matches[2]) ? $matches[2] : null;
81
                $outputName = isset($matches[3]) ? $matches[3] : null;
82
                $attrData = isset($matches[4]) ? $matches[4] : null;
83
84
                // Create the new element
85
                $element = new stdClass();
86
                $element->attributes = array();
87
                $element->attributePatterns = array();
88
89
                $element->attributesRequired = array();
90
                $element->attributesDefault = array();
91
                $element->attributesForced = array();
92
93
                foreach (array('#' => 'paddEmpty', '-' => 'removeEmpty') as $match => $means) {
94
                    $element->$means = ($prefix === $match);
95
                }
96
97
                // Copy attributes from global rule into current rule
98
                if ($this->globalAttributes) {
99
                    $element->attributes = array_merge($element->attributes, $this->globalAttributes);
100
                }
101
102
                // Attributes defined
103
                if ($attrData) {
104
                    foreach (explode('|', $attrData) as $attr) {
105
                        if (preg_match($attrRuleRegExp, $attr, $matches)) {
106
                            $attr = new stdClass();
107
108
                            $attrType = isset($matches[1]) ? $matches[1] : null;
109
                            $attrName = isset($matches[2]) ? str_replace('::', ':', $matches[2]) : null;
110
                            $prefix = isset($matches[3]) ? $matches[3] : null;
111
                            $value = isset($matches[4]) ? $matches[4] : null;
112
113
                            // Required
114
                            if ($attrType === '!') {
115
                                $element->attributesRequired[] = $attrName;
116
                                $attr->required = true;
117
                            } // Denied from global
118
                            elseif ($attrType === '-') {
119
                                unset($element->attributes[$attrName]);
120
                                continue;
121
                            }
122
123
                            // Default value
124
                            if ($prefix) {
125
                                // Default value
126
                                if ($prefix === '=') {
127
                                    $element->attributesDefault[$attrName] = $value;
128
                                    $attr->defaultValue = $value;
129
                                } // Forced value
130
                                elseif ($prefix === ':') {
131
                                    $element->attributesForced[$attrName] = $value;
132
                                    $attr->forcedValue = $value;
133
                                } // Required values
134
                                elseif ($prefix === '<') {
135
                                    $attr->validValues = explode('?', $value);
136
                                }
137
                            }
138
139
                            // Check for attribute patterns
140
                            if (preg_match($hasPatternsRegExp, $attrName)) {
141
                                $attr->pattern = $this->patternToRegex($attrName);
142
                                $element->attributePatterns[] = $attr;
143
                            } else {
144
                                $element->attributes[$attrName] = $attr;
145
                            }
146
                        }
147
                    }
148
                }
149
150
                // Global rule, store away these for later usage
151
                if (!$this->globalAttributes && $elementName == '@') {
152
                    $this->globalAttributes = $element->attributes;
153
                }
154
155
                // Handle substitute elements such as b/strong
156
                if ($outputName) {
157
                    $element->outputName = $elementName;
158
                    $this->elements[$outputName] = $element;
159
                }
160
161
                // Add pattern or exact element
162
                if (preg_match($hasPatternsRegExp, $elementName)) {
163
                    $element->pattern = $this->patternToRegex($elementName);
164
                    $this->elementPatterns[] = $element;
165
                } else {
166
                    $this->elements[$elementName] = $element;
167
                }
168
            }
169
        }
170
    }
171
172
    /**
173
     * Given an element tag, return the rule structure for that element
174
     * @param string $tag The element tag
175
     * @return stdClass The element rule
176
     */
177
    protected function getRuleForElement($tag)
178
    {
179
        if (isset($this->elements[$tag])) {
180
            return $this->elements[$tag];
181
        }
182
        foreach ($this->elementPatterns as $element) {
183
            if (preg_match($element->pattern, $tag)) {
184
                return $element;
185
            }
186
        }
187
        return null;
188
    }
189
190
    /**
191
     * Given an attribute name, return the rule structure for that attribute
192
     *
193
     * @param stdClass $elementRule
194
     * @param string $name The attribute name
195
     * @return stdClass The attribute rule
196
     */
197
    protected function getRuleForAttribute($elementRule, $name)
198
    {
199
        if (isset($elementRule->attributes[$name])) {
200
            return $elementRule->attributes[$name];
201
        }
202
        foreach ($elementRule->attributePatterns as $attribute) {
203
            if (preg_match($attribute->pattern, $name)) {
204
                return $attribute;
205
            }
206
        }
207
        return null;
208
    }
209
210
    /**
211
     * Given a DOMElement and an element rule, check if that element passes the rule
212
     * @param DOMElement $element The element to check
213
     * @param stdClass $rule The rule to check against
214
     * @return bool true if the element passes (and so can be kept), false if it fails (and so needs stripping)
215
     */
216
    protected function elementMatchesRule($element, $rule = null)
217
    {
218
        // If the rule doesn't exist at all, the element isn't allowed
219
        if (!$rule) {
220
            return false;
221
        }
222
223
        // If the rule has attributes required, check them to see if this element has at least one
224
        if ($rule->attributesRequired) {
225
            $hasMatch = false;
226
227
            foreach ($rule->attributesRequired as $attr) {
228
                if ($element->getAttribute($attr)) {
229
                    $hasMatch = true;
230
                    break;
231
                }
232
            }
233
234
            if (!$hasMatch) {
235
                return false;
236
            }
237
        }
238
239
        // If the rule says to remove empty elements, and this element is empty, remove it
240
        if ($rule->removeEmpty && !$element->firstChild) {
241
            return false;
242
        }
243
244
        // No further tests required, element passes
245
        return true;
246
    }
247
248
    /**
249
     * Given a DOMAttr and an attribute rule, check if that attribute passes the rule
250
     * @param DOMAttr $attr - the attribute to check
251
     * @param stdClass $rule - the rule to check against
252
     * @return bool - true if the attribute passes (and so can be kept), false if it fails (and so needs stripping)
253
     */
254
    protected function attributeMatchesRule($attr, $rule = null)
255
    {
256
        // If the rule doesn't exist at all, the attribute isn't allowed
257
        if (!$rule) {
258
            return false;
259
        }
260
261
        // If the rule has a set of valid values, check them to see if this attribute is one
262
        if (isset($rule->validValues) && !in_array($attr->value, $rule->validValues)) {
263
            return false;
264
        }
265
266
        // No further tests required, attribute passes
267
        return true;
268
    }
269
270
    /**
271
     * Given an SS_HTMLValue instance, will remove and elements and attributes that are
272
     * not explicitly included in the whitelist passed to __construct on instance creation
273
     *
274
     * @param HTMLValue $html - The HTMLValue to remove any non-whitelisted elements & attributes from
275
     */
276
    public function sanitise(HTMLValue $html)
277
    {
278
        if (!$this->elements && !$this->elementPatterns) {
0 ignored issues
show
Bug Best Practice introduced by
The expression $this->elements of type array is implicitly converted to a boolean; are you sure this is intended? If so, consider using empty($expr) instead to make it clear that you intend to check for an array without elements.

This check marks implicit conversions of arrays to boolean values in a comparison. While in PHP an empty array is considered to be equal (but not identical) to false, this is not always apparent.

Consider making the comparison explicit by using empty(..) or ! empty(...) instead.

Loading history...
Bug Best Practice introduced by
The expression $this->elementPatterns of type array is implicitly converted to a boolean; are you sure this is intended? If so, consider using empty($expr) instead to make it clear that you intend to check for an array without elements.

This check marks implicit conversions of arrays to boolean values in a comparison. While in PHP an empty array is considered to be equal (but not identical) to false, this is not always apparent.

Consider making the comparison explicit by using empty(..) or ! empty(...) instead.

Loading history...
279
            return;
280
        }
281
282
        $doc = $html->getDocument();
283
284
        /** @var DOMElement $el */
285
        foreach ($html->query('//body//*') as $el) {
286
            $elementRule = $this->getRuleForElement($el->tagName);
287
288
            // If this element isn't allowed, strip it
289
            if (!$this->elementMatchesRule($el, $elementRule)) {
290
                // If it's a script or style, we don't keep contents
291
                if ($el->tagName === 'script' || $el->tagName === 'style') {
292
                    $el->parentNode->removeChild($el);
293
                } // Otherwise we replace this node with all it's children
294
                else {
295
                    // First, create a new fragment with all of $el's children moved into it
296
                    $frag = $doc->createDocumentFragment();
297
                    while ($el->firstChild) {
298
                        $frag->appendChild($el->firstChild);
299
                    }
300
301
                    // Then replace $el with the frags contents (which used to be it's children)
302
                    $el->parentNode->replaceChild($frag, $el);
303
                }
304
            } // Otherwise tidy the element
305
            else {
306
                // First, if we're supposed to pad & this element is empty, fix that
307
                if ($elementRule->paddEmpty && !$el->firstChild) {
308
                    $el->nodeValue = '&nbsp;';
309
                }
310
311
                // Then filter out any non-whitelisted attributes
312
                $children = $el->attributes;
313
                $i = $children->length;
314
                while ($i--) {
315
                    $attr = $children->item($i);
316
                    $attributeRule = $this->getRuleForAttribute($elementRule, $attr->name);
317
318
                    // If this attribute isn't allowed, strip it
319
                    if (!$this->attributeMatchesRule($attr, $attributeRule)) {
320
                        $el->removeAttributeNode($attr);
321
                    }
322
                }
323
324
                // Then enforce any default attributes
325
                foreach ($elementRule->attributesDefault as $attr => $default) {
326
                    if (!$el->getAttribute($attr)) {
327
                        $el->setAttribute($attr, $default);
328
                    }
329
                }
330
331
                // And any forced attributes
332
                foreach ($elementRule->attributesForced as $attr => $forced) {
333
                    $el->setAttribute($attr, $forced);
334
                }
335
            }
336
        }
337
    }
338
}
339