1
|
|
|
<?php |
2
|
|
|
|
3
|
|
|
// why is this a top level function? Because PHP 5.2.0 doesn't seem to |
4
|
|
|
// understand how to interpret this filter if it's a static method. |
5
|
|
|
// It's all really silly, but if we go this route it might be reasonable |
6
|
|
|
// to coalesce all of these methods into one. |
7
|
|
|
function htmlpurifier_filter_extractstyleblocks_muteerrorhandler() |
8
|
|
|
{ |
9
|
|
|
} |
10
|
|
|
|
11
|
|
|
/** |
12
|
|
|
* This filter extracts <style> blocks from input HTML, cleans them up |
13
|
|
|
* using CSSTidy, and then places them in $purifier->context->get('StyleBlocks') |
14
|
|
|
* so they can be used elsewhere in the document. |
15
|
|
|
* |
16
|
|
|
* @note |
17
|
|
|
* See tests/HTMLPurifier/Filter/ExtractStyleBlocksTest.php for |
18
|
|
|
* sample usage. |
19
|
|
|
* |
20
|
|
|
* @note |
21
|
|
|
* This filter can also be used on stylesheets not included in the |
22
|
|
|
* document--something purists would probably prefer. Just directly |
23
|
|
|
* call HTMLPurifier_Filter_ExtractStyleBlocks->cleanCSS() |
24
|
|
|
*/ |
25
|
|
|
class HTMLPurifier_Filter_ExtractStyleBlocks extends HTMLPurifier_Filter |
26
|
|
|
{ |
27
|
|
|
/** |
28
|
|
|
* @type string |
29
|
|
|
*/ |
30
|
|
|
public $name = 'ExtractStyleBlocks'; |
31
|
|
|
|
32
|
|
|
/** |
33
|
|
|
* @type array |
34
|
|
|
*/ |
35
|
|
|
private $_styleMatches = array(); |
36
|
|
|
|
37
|
|
|
/** |
38
|
|
|
* @type csstidy |
|
|
|
|
39
|
|
|
*/ |
40
|
|
|
private $_tidy; |
41
|
|
|
|
42
|
|
|
/** |
43
|
|
|
* @type HTMLPurifier_AttrDef_HTML_ID |
44
|
|
|
*/ |
45
|
|
|
private $_id_attrdef; |
46
|
|
|
|
47
|
|
|
/** |
48
|
|
|
* @type HTMLPurifier_AttrDef_CSS_Ident |
49
|
|
|
*/ |
50
|
|
|
private $_class_attrdef; |
51
|
|
|
|
52
|
|
|
/** |
53
|
|
|
* @type HTMLPurifier_AttrDef_Enum |
54
|
|
|
*/ |
55
|
|
|
private $_enum_attrdef; |
56
|
|
|
|
57
|
|
|
public function __construct() |
58
|
|
|
{ |
59
|
|
|
$this->_tidy = new csstidy(); |
60
|
|
|
$this->_tidy->set_cfg('lowercase_s', false); |
61
|
|
|
$this->_id_attrdef = new HTMLPurifier_AttrDef_HTML_ID(true); |
62
|
|
|
$this->_class_attrdef = new HTMLPurifier_AttrDef_CSS_Ident(); |
63
|
|
|
$this->_enum_attrdef = new HTMLPurifier_AttrDef_Enum( |
64
|
|
|
array( |
65
|
|
|
'first-child', |
66
|
|
|
'link', |
67
|
|
|
'visited', |
68
|
|
|
'active', |
69
|
|
|
'hover', |
70
|
|
|
'focus' |
71
|
|
|
) |
72
|
|
|
); |
73
|
|
|
} |
74
|
|
|
|
75
|
|
|
/** |
76
|
|
|
* Save the contents of CSS blocks to style matches |
77
|
|
|
* @param array $matches preg_replace style $matches array |
78
|
|
|
*/ |
79
|
|
|
protected function styleCallback($matches) |
80
|
|
|
{ |
81
|
|
|
$this->_styleMatches[] = $matches[1]; |
82
|
|
|
} |
83
|
|
|
|
84
|
|
|
/** |
85
|
|
|
* Removes inline <style> tags from HTML, saves them for later use |
86
|
|
|
* @param string $html |
87
|
|
|
* @param HTMLPurifier_Config $config |
88
|
|
|
* @param HTMLPurifier_Context $context |
89
|
|
|
* @return string |
90
|
|
|
* @todo Extend to indicate non-text/css style blocks |
91
|
|
|
*/ |
92
|
|
|
public function preFilter($html, $config, $context) |
93
|
|
|
{ |
94
|
|
|
$tidy = $config->get('Filter.ExtractStyleBlocks.TidyImpl'); |
95
|
|
|
if ($tidy !== null) { |
96
|
|
|
$this->_tidy = $tidy; |
97
|
|
|
} |
98
|
|
|
// NB: this must be NON-greedy because if we have |
99
|
|
|
// <style>foo</style> <style>bar</style> |
100
|
|
|
// we must not grab foo</style> <style>bar |
101
|
|
|
$html = preg_replace_callback('#<style(?:\s.*)?>(.*)<\/style>#isU', array($this, 'styleCallback'), $html); |
102
|
|
|
$style_blocks = $this->_styleMatches; |
103
|
|
|
$this->_styleMatches = array(); // reset |
104
|
|
|
$context->register('StyleBlocks', $style_blocks); // $context must not be reused |
105
|
|
|
if ($this->_tidy) { |
106
|
|
|
foreach ($style_blocks as &$style) { |
107
|
|
|
$style = $this->cleanCSS($style, $config, $context); |
108
|
|
|
} |
109
|
|
|
} |
110
|
|
|
return $html; |
111
|
|
|
} |
112
|
|
|
|
113
|
|
|
/** |
114
|
|
|
* Takes CSS (the stuff found in <style>) and cleans it. |
115
|
|
|
* @warning Requires CSSTidy <http://csstidy.sourceforge.net/> |
116
|
|
|
* @param string $css CSS styling to clean |
117
|
|
|
* @param HTMLPurifier_Config $config |
118
|
|
|
* @param HTMLPurifier_Context $context |
119
|
|
|
* @throws HTMLPurifier_Exception |
120
|
|
|
* @return string Cleaned CSS |
121
|
|
|
*/ |
122
|
|
|
public function cleanCSS($css, $config, $context) |
123
|
|
|
{ |
124
|
|
|
// prepare scope |
125
|
|
|
$scope = $config->get('Filter.ExtractStyleBlocks.Scope'); |
126
|
|
|
if ($scope !== null) { |
127
|
|
|
$scopes = array_map('trim', explode(',', $scope)); |
128
|
|
|
} else { |
129
|
|
|
$scopes = array(); |
130
|
|
|
} |
131
|
|
|
// remove comments from CSS |
132
|
|
|
$css = trim($css); |
133
|
|
|
if (strncmp('<!--', $css, 4) === 0) { |
134
|
|
|
$css = substr($css, 4); |
135
|
|
|
} |
136
|
|
|
if (strlen($css) > 3 && substr($css, -3) == '-->') { |
137
|
|
|
$css = substr($css, 0, -3); |
138
|
|
|
} |
139
|
|
|
$css = trim($css); |
140
|
|
|
set_error_handler('htmlpurifier_filter_extractstyleblocks_muteerrorhandler'); |
141
|
|
|
$this->_tidy->parse($css); |
142
|
|
|
restore_error_handler(); |
143
|
|
|
$css_definition = $config->getDefinition('CSS'); |
144
|
|
|
$html_definition = $config->getDefinition('HTML'); |
145
|
|
|
$new_css = array(); |
146
|
|
|
foreach ($this->_tidy->css as $k => $decls) { |
147
|
|
|
// $decls are all CSS declarations inside an @ selector |
148
|
|
|
$new_decls = array(); |
149
|
|
|
foreach ($decls as $selector => $style) { |
150
|
|
|
$selector = trim($selector); |
151
|
|
|
if ($selector === '') { |
152
|
|
|
continue; |
153
|
|
|
} // should not happen |
154
|
|
|
// Parse the selector |
155
|
|
|
// Here is the relevant part of the CSS grammar: |
156
|
|
|
// |
157
|
|
|
// ruleset |
158
|
|
|
// : selector [ ',' S* selector ]* '{' ... |
159
|
|
|
// selector |
160
|
|
|
// : simple_selector [ combinator selector | S+ [ combinator? selector ]? ]? |
161
|
|
|
// combinator |
162
|
|
|
// : '+' S* |
163
|
|
|
// : '>' S* |
164
|
|
|
// simple_selector |
165
|
|
|
// : element_name [ HASH | class | attrib | pseudo ]* |
166
|
|
|
// | [ HASH | class | attrib | pseudo ]+ |
167
|
|
|
// element_name |
168
|
|
|
// : IDENT | '*' |
169
|
|
|
// ; |
170
|
|
|
// class |
171
|
|
|
// : '.' IDENT |
172
|
|
|
// ; |
173
|
|
|
// attrib |
174
|
|
|
// : '[' S* IDENT S* [ [ '=' | INCLUDES | DASHMATCH ] S* |
175
|
|
|
// [ IDENT | STRING ] S* ]? ']' |
176
|
|
|
// ; |
177
|
|
|
// pseudo |
178
|
|
|
// : ':' [ IDENT | FUNCTION S* [IDENT S*]? ')' ] |
179
|
|
|
// ; |
180
|
|
|
// |
181
|
|
|
// For reference, here are the relevant tokens: |
182
|
|
|
// |
183
|
|
|
// HASH #{name} |
184
|
|
|
// IDENT {ident} |
185
|
|
|
// INCLUDES == |
186
|
|
|
// DASHMATCH |= |
187
|
|
|
// STRING {string} |
188
|
|
|
// FUNCTION {ident}\( |
189
|
|
|
// |
190
|
|
|
// And the lexical scanner tokens |
191
|
|
|
// |
192
|
|
|
// name {nmchar}+ |
193
|
|
|
// nmchar [_a-z0-9-]|{nonascii}|{escape} |
194
|
|
|
// nonascii [\240-\377] |
195
|
|
|
// escape {unicode}|\\[^\r\n\f0-9a-f] |
196
|
|
|
// unicode \\{h}}{1,6}(\r\n|[ \t\r\n\f])? |
197
|
|
|
// ident -?{nmstart}{nmchar*} |
198
|
|
|
// nmstart [_a-z]|{nonascii}|{escape} |
199
|
|
|
// string {string1}|{string2} |
200
|
|
|
// string1 \"([^\n\r\f\\"]|\\{nl}|{escape})*\" |
201
|
|
|
// string2 \'([^\n\r\f\\"]|\\{nl}|{escape})*\' |
202
|
|
|
// |
203
|
|
|
// We'll implement a subset (in order to reduce attack |
204
|
|
|
// surface); in particular: |
205
|
|
|
// |
206
|
|
|
// - No Unicode support |
207
|
|
|
// - No escapes support |
208
|
|
|
// - No string support (by proxy no attrib support) |
209
|
|
|
// - element_name is matched against allowed |
210
|
|
|
// elements (some people might find this |
211
|
|
|
// annoying...) |
212
|
|
|
// - Pseudo-elements one of :first-child, :link, |
213
|
|
|
// :visited, :active, :hover, :focus |
214
|
|
|
|
215
|
|
|
// handle ruleset |
216
|
|
|
$selectors = array_map('trim', explode(',', $selector)); |
217
|
|
|
$new_selectors = array(); |
218
|
|
|
foreach ($selectors as $sel) { |
219
|
|
|
// split on +, > and spaces |
220
|
|
|
$basic_selectors = preg_split('/\s*([+> ])\s*/', $sel, -1, PREG_SPLIT_DELIM_CAPTURE); |
221
|
|
|
// even indices are chunks, odd indices are |
222
|
|
|
// delimiters |
223
|
|
|
$nsel = null; |
224
|
|
|
$delim = null; // guaranteed to be non-null after |
225
|
|
|
// two loop iterations |
226
|
|
|
for ($i = 0, $c = count($basic_selectors); $i < $c; $i++) { |
227
|
|
|
$x = $basic_selectors[$i]; |
228
|
|
|
if ($i % 2) { |
229
|
|
|
// delimiter |
230
|
|
|
if ($x === ' ') { |
231
|
|
|
$delim = ' '; |
232
|
|
|
} else { |
233
|
|
|
$delim = ' ' . $x . ' '; |
234
|
|
|
} |
235
|
|
|
} else { |
236
|
|
|
// simple selector |
237
|
|
|
$components = preg_split('/([#.:])/', $x, -1, PREG_SPLIT_DELIM_CAPTURE); |
238
|
|
|
$sdelim = null; |
239
|
|
|
$nx = null; |
240
|
|
|
for ($j = 0, $cc = count($components); $j < $cc; $j++) { |
241
|
|
|
$y = $components[$j]; |
242
|
|
|
if ($j === 0) { |
243
|
|
|
if ($y === '*' || isset($html_definition->info[$y = strtolower($y)])) { |
244
|
|
|
$nx = $y; |
245
|
|
|
} else { |
246
|
|
|
// $nx stays null; this matters |
247
|
|
|
// if we don't manage to find |
248
|
|
|
// any valid selector content, |
249
|
|
|
// in which case we ignore the |
250
|
|
|
// outer $delim |
251
|
|
|
} |
252
|
|
|
} elseif ($j % 2) { |
253
|
|
|
// set delimiter |
254
|
|
|
$sdelim = $y; |
255
|
|
|
} else { |
256
|
|
|
$attrdef = null; |
257
|
|
|
if ($sdelim === '#') { |
258
|
|
|
$attrdef = $this->_id_attrdef; |
259
|
|
|
} elseif ($sdelim === '.') { |
260
|
|
|
$attrdef = $this->_class_attrdef; |
261
|
|
|
} elseif ($sdelim === ':') { |
262
|
|
|
$attrdef = $this->_enum_attrdef; |
263
|
|
|
} else { |
264
|
|
|
throw new HTMLPurifier_Exception('broken invariant sdelim and preg_split'); |
265
|
|
|
} |
266
|
|
|
$r = $attrdef->validate($y, $config, $context); |
267
|
|
|
if ($r !== false) { |
268
|
|
|
if ($r !== true) { |
269
|
|
|
$y = $r; |
270
|
|
|
} |
271
|
|
|
if ($nx === null) { |
272
|
|
|
$nx = ''; |
273
|
|
|
} |
274
|
|
|
$nx .= $sdelim . $y; |
|
|
|
|
275
|
|
|
} |
276
|
|
|
} |
277
|
|
|
} |
278
|
|
|
if ($nx !== null) { |
279
|
|
|
if ($nsel === null) { |
280
|
|
|
$nsel = $nx; |
281
|
|
|
} else { |
282
|
|
|
$nsel .= $delim . $nx; |
283
|
|
|
} |
284
|
|
|
} else { |
285
|
|
|
// delimiters to the left of invalid |
286
|
|
|
// basic selector ignored |
287
|
|
|
} |
288
|
|
|
} |
289
|
|
|
} |
290
|
|
|
if ($nsel !== null) { |
291
|
|
|
if (!empty($scopes)) { |
292
|
|
|
foreach ($scopes as $s) { |
293
|
|
|
$new_selectors[] = "$s $nsel"; |
294
|
|
|
} |
295
|
|
|
} else { |
296
|
|
|
$new_selectors[] = $nsel; |
297
|
|
|
} |
298
|
|
|
} |
299
|
|
|
} |
300
|
|
|
if (empty($new_selectors)) { |
301
|
|
|
continue; |
302
|
|
|
} |
303
|
|
|
$selector = implode(', ', $new_selectors); |
304
|
|
|
foreach ($style as $name => $value) { |
305
|
|
|
if (!isset($css_definition->info[$name])) { |
306
|
|
|
unset($style[$name]); |
307
|
|
|
continue; |
308
|
|
|
} |
309
|
|
|
$def = $css_definition->info[$name]; |
310
|
|
|
$ret = $def->validate($value, $config, $context); |
311
|
|
|
if ($ret === false) { |
312
|
|
|
unset($style[$name]); |
313
|
|
|
} else { |
314
|
|
|
$style[$name] = $ret; |
315
|
|
|
} |
316
|
|
|
} |
317
|
|
|
$new_decls[$selector] = $style; |
318
|
|
|
} |
319
|
|
|
$new_css[$k] = $new_decls; |
320
|
|
|
} |
321
|
|
|
// remove stuff that shouldn't be used, could be reenabled |
322
|
|
|
// after security risks are analyzed |
323
|
|
|
$this->_tidy->css = $new_css; |
324
|
|
|
$this->_tidy->import = array(); |
325
|
|
|
$this->_tidy->charset = null; |
326
|
|
|
$this->_tidy->namespace = null; |
327
|
|
|
$css = $this->_tidy->print->plain(); |
328
|
|
|
// we are going to escape any special characters <>& to ensure |
329
|
|
|
// that no funny business occurs (i.e. </style> in a font-family prop). |
330
|
|
|
if ($config->get('Filter.ExtractStyleBlocks.Escaping')) { |
331
|
|
|
$css = str_replace( |
332
|
|
|
array('<', '>', '&'), |
333
|
|
|
array('\3C ', '\3E ', '\26 '), |
334
|
|
|
$css |
335
|
|
|
); |
336
|
|
|
} |
337
|
|
|
return $css; |
338
|
|
|
} |
339
|
|
|
} |
340
|
|
|
|
341
|
|
|
// vim: et sw=4 sts=4 |
342
|
|
|
|
The issue could also be caused by a filter entry in the build configuration. If the path has been excluded in your configuration, e.g.
excluded_paths: ["lib/*"]
, you can move it to the dependency path list as follows:For further information see https://scrutinizer-ci.com/docs/tools/php/php-scrutinizer/#list-dependency-paths