1
|
|
|
<?php
|
2
|
|
|
/**
|
3
|
|
|
* This file is part of the O2System Framework package.
|
4
|
|
|
*
|
5
|
|
|
* For the full copyright and license information, please view the LICENSE
|
6
|
|
|
* file that was distributed with this source code.
|
7
|
|
|
*
|
8
|
|
|
* @author Steeve Andrian Salim
|
9
|
|
|
* @copyright Copyright (c) Steeve Andrian Salim
|
10
|
|
|
*/
|
11
|
|
|
|
12
|
|
|
// ------------------------------------------------------------------------
|
13
|
|
|
|
14
|
|
|
namespace O2System\Security\Filters;
|
15
|
|
|
|
16
|
|
|
/**
|
17
|
|
|
* Class Xss
|
18
|
|
|
*
|
19
|
|
|
* @package O2System\Security\Filters
|
20
|
|
|
*/
|
21
|
|
|
class Xss
|
22
|
|
|
{
|
23
|
|
|
/**
|
24
|
|
|
* Clean
|
25
|
|
|
*
|
26
|
|
|
* @param string $string
|
27
|
|
|
* @param boolean $isImage
|
28
|
|
|
*
|
29
|
|
|
* @return string
|
30
|
|
|
*/
|
31
|
|
|
public static function clean($string, $isImage = false)
|
32
|
|
|
{
|
33
|
|
|
// Is the string an array?
|
34
|
|
|
if (is_array($string)) {
|
|
|
|
|
35
|
|
|
while (list($key) = each($string)) {
|
|
|
|
|
36
|
|
|
$string[ $key ] = self::clean($string[ $key ]);
|
37
|
|
|
}
|
38
|
|
|
|
39
|
|
|
return $string;
|
40
|
|
|
}
|
41
|
|
|
|
42
|
|
|
// Remove Invisible Characters
|
43
|
|
|
$string = remove_invisible_characters($string);
|
44
|
|
|
|
45
|
|
|
/*
|
46
|
|
|
* URL Decode
|
47
|
|
|
*
|
48
|
|
|
* Just in case stuff like this is submitted:
|
49
|
|
|
*
|
50
|
|
|
* <a href="http://%77%77%77%2E%67%6F%6F%67%6C%65%2E%63%6F%6D">Google</a>
|
51
|
|
|
*
|
52
|
|
|
* Note: Use rawurldecode() so it does not remove plus signs
|
53
|
|
|
*/
|
54
|
|
|
do {
|
55
|
|
|
$string = rawurldecode($string);
|
56
|
|
|
} while (preg_match('/%[0-9a-f]{2,}/i', $string));
|
57
|
|
|
|
58
|
|
|
/*
|
59
|
|
|
* Convert character entities to ASCII
|
60
|
|
|
*
|
61
|
|
|
* This permits our tests below to work reliably.
|
62
|
|
|
* We only convert entities that are within tags since
|
63
|
|
|
* these are the ones that will pose security problems.
|
64
|
|
|
*/
|
65
|
|
|
$string = preg_replace_callback(
|
66
|
|
|
"/[^a-z0-9>]+[a-z0-9]+=([\'\"]).*?\\1/si",
|
67
|
|
|
[self::class, 'convertAttribute'],
|
68
|
|
|
$string
|
69
|
|
|
);
|
70
|
|
|
|
71
|
|
|
$string = preg_replace_callback('/<\w+.*/si', [self::class, 'decodeEntity'], $string);
|
72
|
|
|
|
73
|
|
|
// Remove Invisible Characters Again!
|
74
|
|
|
$string = remove_invisible_characters($string);
|
75
|
|
|
|
76
|
|
|
/*
|
77
|
|
|
* Convert all tabs to spaces
|
78
|
|
|
*
|
79
|
|
|
* This prevents strings like this: ja vascript
|
80
|
|
|
* NOTE: we deal with spaces between characters later.
|
81
|
|
|
* NOTE: preg_replace was found to be amazingly slow here on
|
82
|
|
|
* large blocks of data, so we use str_replace.
|
83
|
|
|
*/
|
84
|
|
|
$string = str_replace("\t", ' ', $string);
|
85
|
|
|
|
86
|
|
|
// Capture converted string for later comparison
|
87
|
|
|
$convertedString = $string;
|
88
|
|
|
|
89
|
|
|
// Remove Strings that are never allowed
|
90
|
|
|
$string = self::doNeverAllowed($string);
|
91
|
|
|
|
92
|
|
|
/*
|
93
|
|
|
* Makes PHP tags safe
|
94
|
|
|
*
|
95
|
|
|
* Note: XML tags are inadvertently replaced too:
|
96
|
|
|
*
|
97
|
|
|
* <?xml
|
98
|
|
|
*
|
99
|
|
|
* But it doesn't seem to pose a problem.
|
100
|
|
|
*/
|
101
|
|
|
if ($isImage === true) {
|
102
|
|
|
// Images have a tendency to have the PHP short opening and
|
103
|
|
|
// closing tags every so often so we skip those and only
|
104
|
|
|
// do the long opening tags.
|
105
|
|
|
$string = preg_replace('/<\?(php)/i', '<?\\1', $string);
|
106
|
|
|
} else {
|
107
|
|
|
$string = str_replace(['<?', '?' . '>'], ['<?', '?>'], $string);
|
108
|
|
|
}
|
109
|
|
|
|
110
|
|
|
/*
|
111
|
|
|
* Compact any exploded words
|
112
|
|
|
*
|
113
|
|
|
* This corrects words like: j a v a s c r i p t
|
114
|
|
|
* These words are compacted back to their correct state.
|
115
|
|
|
*/
|
116
|
|
|
$words = [
|
117
|
|
|
'javascript',
|
118
|
|
|
'expression',
|
119
|
|
|
'vbscript',
|
120
|
|
|
'jscript',
|
121
|
|
|
'wscript',
|
122
|
|
|
'vbs',
|
123
|
|
|
'script',
|
124
|
|
|
'base64',
|
125
|
|
|
'applet',
|
126
|
|
|
'alert',
|
127
|
|
|
'document',
|
128
|
|
|
'write',
|
129
|
|
|
'cookie',
|
130
|
|
|
'window',
|
131
|
|
|
'confirm',
|
132
|
|
|
'prompt',
|
133
|
|
|
'eval',
|
134
|
|
|
];
|
135
|
|
|
|
136
|
|
|
foreach ($words as $word) {
|
137
|
|
|
$word = implode('\s*', str_split($word)) . '\s*';
|
138
|
|
|
|
139
|
|
|
// We only want to do this when it is followed by a non-word character
|
140
|
|
|
// That way valid stuff like "dealer to" does not become "dealerto"
|
141
|
|
|
$string = preg_replace_callback(
|
142
|
|
|
'#(' . substr($word, 0, -3) . ')(\W)#is',
|
143
|
|
|
[self::class, 'compactExplodedWords'],
|
144
|
|
|
$string
|
145
|
|
|
);
|
146
|
|
|
}
|
147
|
|
|
|
148
|
|
|
/*
|
149
|
|
|
* Remove disallowed Javascript in links or img tags
|
150
|
|
|
* We used to do some version comparisons and use of stripos(),
|
151
|
|
|
* but it is dog slow compared to these simplified non-capturing
|
152
|
|
|
* preg_match(), especially if the pattern exists in the string
|
153
|
|
|
*
|
154
|
|
|
* Note: It was reported that not only space characters, but all in
|
155
|
|
|
* the following pattern can be parsed as separators between a tag name
|
156
|
|
|
* and its attributes: [\d\s"\'`;,\/\=\(\x00\x0B\x09\x0C]
|
157
|
|
|
* ... however, remove_invisible_characters() above already strips the
|
158
|
|
|
* hex-encoded ones, so we'll skip them below.
|
159
|
|
|
*/
|
160
|
|
|
do {
|
161
|
|
|
$original = $string;
|
162
|
|
|
if (preg_match('/<a/i', $string)) {
|
163
|
|
|
$string = preg_replace_callback(
|
164
|
|
|
'#<a[^a-z0-9>]+([^>]*?)(?:>|$)#si',
|
165
|
|
|
[self::class, 'jsLinkRemoval'],
|
166
|
|
|
$string
|
167
|
|
|
);
|
168
|
|
|
}
|
169
|
|
|
if (preg_match('/<img/i', $string)) {
|
170
|
|
|
$string = preg_replace_callback(
|
171
|
|
|
'#<img[^a-z0-9]+([^>]*?)(?:\s?/?>|$)#si',
|
172
|
|
|
[self::class, 'jsImgRemoval'],
|
173
|
|
|
$string
|
174
|
|
|
);
|
175
|
|
|
}
|
176
|
|
|
if (preg_match('/script|xss/i', $string)) {
|
177
|
|
|
$string = preg_replace('#</*(?:script|xss).*?>#si', '[removed]', $string);
|
178
|
|
|
}
|
179
|
|
|
} while ($original !== $string);
|
180
|
|
|
unset($original);
|
181
|
|
|
|
182
|
|
|
/*
|
183
|
|
|
* Sanitize naughty HTML elements
|
184
|
|
|
*
|
185
|
|
|
* If a tag containing any of the words in the list
|
186
|
|
|
* below is found, the tag gets converted to entities.
|
187
|
|
|
*
|
188
|
|
|
* So this: <blink>
|
189
|
|
|
* Becomes: <blink>
|
190
|
|
|
*/
|
191
|
|
|
$pattern = '#'
|
192
|
|
|
. '<((?<slash>/*\s*)(?<tagName>[a-z0-9]+)(?=[^a-z0-9]|$)'
|
193
|
|
|
// tag start and name, followed by a non-tag character
|
194
|
|
|
. '[^\s\042\047a-z0-9>/=]*'
|
195
|
|
|
// a valid attribute character immediately after the tag would count as a separator
|
196
|
|
|
// optional attributes
|
197
|
|
|
. '(?<attributes>(?:[\s\042\047/=]*'
|
198
|
|
|
// non-attribute characters, excluding > (tag close) for obvious reasons
|
199
|
|
|
. '[^\s\042\047>/=]+'
|
200
|
|
|
// attribute characters
|
201
|
|
|
// optional attribute-value
|
202
|
|
|
. '(?:\s*='
|
203
|
|
|
// attribute-value separator
|
204
|
|
|
. '(?:[^\s\042\047=><`]+|\s*\042[^\042]*\042|\s*\047[^\047]*\047|\s*(?U:[^\s\042\047=><`]*))'
|
205
|
|
|
// single, double or non-quoted value
|
206
|
|
|
. ')?'
|
207
|
|
|
// end optional attribute-value group
|
208
|
|
|
. ')*)'
|
209
|
|
|
// end optional attributes group
|
210
|
|
|
. '[^>]*)(?<closeTag>\>)?#isS';
|
211
|
|
|
// Note: It would be nice to optimize this for speed, BUT
|
212
|
|
|
// only matching the naughty elements here results in
|
213
|
|
|
// false positives and in turn - vulnerabilities!
|
214
|
|
|
do {
|
215
|
|
|
$oldString = $string;
|
216
|
|
|
$string = preg_replace_callback($pattern, [self::class, 'sanitizeNaughtyHTML'], $string);
|
217
|
|
|
} while ($oldString !== $string);
|
218
|
|
|
|
219
|
|
|
unset($oldString);
|
220
|
|
|
|
221
|
|
|
/*
|
222
|
|
|
* Sanitize naughty scripting elements
|
223
|
|
|
*
|
224
|
|
|
* Similar to above, only instead of looking for
|
225
|
|
|
* tags it looks for PHP and JavaScript commands
|
226
|
|
|
* that are disallowed. Rather than removing the
|
227
|
|
|
* code, it simply converts the parenthesis to entities
|
228
|
|
|
* rendering the code un-executable.
|
229
|
|
|
*
|
230
|
|
|
* For example: eval('some code')
|
231
|
|
|
* Becomes: eval('some code')
|
232
|
|
|
*/
|
233
|
|
|
$string = preg_replace(
|
234
|
|
|
'#(alert|prompt|confirm|cmd|passthru|eval|exec|expression|system|fopen|fsockopen|file|file_get_contents|readfile|unlink)(\s*)\((.*?)\)#si',
|
235
|
|
|
'\\1\\2(\\3)',
|
236
|
|
|
$string
|
237
|
|
|
);
|
238
|
|
|
|
239
|
|
|
// Final clean up
|
240
|
|
|
// This adds a bit of extra precaution in case
|
241
|
|
|
// something got through the above filters
|
242
|
|
|
$string = self::doNeverAllowed($string);
|
243
|
|
|
|
244
|
|
|
/*
|
245
|
|
|
* Images are Handled in a Special Way
|
246
|
|
|
* - Essentially, we want to know that after all of the character
|
247
|
|
|
* conversion is done whether any unwanted, likely XSS, code was found.
|
248
|
|
|
* If not, we return TRUE, as the image is clean.
|
249
|
|
|
* However, if the string post-conversion does not matched the
|
250
|
|
|
* string post-removal of XSS, then it fails, as there was unwanted XSS
|
251
|
|
|
* code found and removed/changed during processing.
|
252
|
|
|
*/
|
253
|
|
|
if ($isImage === true) {
|
254
|
|
|
return ($string === $convertedString);
|
|
|
|
|
255
|
|
|
}
|
256
|
|
|
|
257
|
|
|
return $string;
|
258
|
|
|
}
|
259
|
|
|
|
260
|
|
|
/**
|
261
|
|
|
* Do Never Allowed
|
262
|
|
|
*
|
263
|
|
|
* @used-by XSS::clean()
|
264
|
|
|
*
|
265
|
|
|
* @param string
|
266
|
|
|
*
|
267
|
|
|
* @return string
|
268
|
|
|
*/
|
269
|
|
|
protected static function doNeverAllowed($string)
|
270
|
|
|
{
|
271
|
|
|
$string = str_replace(
|
272
|
|
|
array_keys(self::getConfig('never_allowed_strings')),
|
|
|
|
|
273
|
|
|
self::getConfig('never_allowed_strings'),
|
274
|
|
|
$string
|
275
|
|
|
);
|
276
|
|
|
|
277
|
|
|
foreach (self::getConfig('never_allowed_regex') as $regex) {
|
278
|
|
|
$string = preg_replace('#' . $regex . '#is', '[removed]', $string);
|
279
|
|
|
}
|
280
|
|
|
|
281
|
|
|
return $string;
|
282
|
|
|
}
|
283
|
|
|
|
284
|
|
|
// --------------------------------------------------------------------------------------
|
285
|
|
|
|
286
|
|
|
protected function getConfig($index)
|
287
|
|
|
{
|
288
|
|
|
static $config;
|
289
|
|
|
|
290
|
|
|
if (empty($config)) {
|
291
|
|
|
$config = require('../Config/Xss.php');
|
292
|
|
|
}
|
293
|
|
|
|
294
|
|
|
return $config[ $index ];
|
295
|
|
|
}
|
296
|
|
|
|
297
|
|
|
// --------------------------------------------------------------------
|
298
|
|
|
|
299
|
|
|
/**
|
300
|
|
|
* Compact Exploded Words
|
301
|
|
|
*
|
302
|
|
|
* Callback method for xss_clean() to remove whitespace from
|
303
|
|
|
* things like 'j a v a s c r i p t'.
|
304
|
|
|
*
|
305
|
|
|
* @used-by XSS::clean()
|
306
|
|
|
*
|
307
|
|
|
* @param array $matches
|
308
|
|
|
*
|
309
|
|
|
* @return string
|
310
|
|
|
*/
|
311
|
|
|
protected static function compactExplodedWords($matches)
|
312
|
|
|
{
|
313
|
|
|
return preg_replace('/\s+/s', '', $matches[ 1 ]) . $matches[ 2 ];
|
314
|
|
|
}
|
315
|
|
|
|
316
|
|
|
// --------------------------------------------------------------------
|
317
|
|
|
|
318
|
|
|
/**
|
319
|
|
|
* Sanitize Naughty HTML
|
320
|
|
|
*
|
321
|
|
|
* Callback method for xss_clean() to remove naughty HTML elements.
|
322
|
|
|
*
|
323
|
|
|
* @used-by XSS::clean()
|
324
|
|
|
*
|
325
|
|
|
* @param array $matches
|
326
|
|
|
*
|
327
|
|
|
* @return string
|
328
|
|
|
*/
|
329
|
|
|
protected static function sanitizeNaughtyHTML($matches)
|
330
|
|
|
{
|
331
|
|
|
// First, escape unclosed tags
|
332
|
|
|
if (empty($matches[ 'closeTag' ])) {
|
333
|
|
|
return '<' . $matches[ 1 ];
|
334
|
|
|
} // Is the element that we caught naughty? If so, escape it
|
335
|
|
|
elseif (in_array(strtolower($matches[ 'tagName' ]), self::getConfig('naughty_tags'), true)) {
|
|
|
|
|
336
|
|
|
return '<' . $matches[ 1 ] . '>';
|
337
|
|
|
} // For other tags, see if their attributes are "evil" and strip those
|
338
|
|
|
elseif (isset($matches[ 'attributes' ])) {
|
339
|
|
|
// We'll store the already fitlered attributes here
|
340
|
|
|
$attributes = [];
|
341
|
|
|
|
342
|
|
|
// Attribute-catching pattern
|
343
|
|
|
$attributesPattern = '#'
|
344
|
|
|
. '(?<name>[^\s\042\047>/=]+)'
|
345
|
|
|
// attribute characters
|
346
|
|
|
// optional attribute-value
|
347
|
|
|
. '(?:\s*=(?<value>[^\s\042\047=><`]+|\s*\042[^\042]*\042|\s*\047[^\047]*\047|\s*(?U:[^\s\042\047=><`]*)))'
|
348
|
|
|
// attribute-value separator
|
349
|
|
|
. '#i';
|
350
|
|
|
|
351
|
|
|
// Blacklist pattern for evil attribute names
|
352
|
|
|
$is_evil_pattern = '#^(' . implode('|', self::getConfig('evil_attributes')) . ')$#i';
|
353
|
|
|
|
354
|
|
|
// Each iteration filters a single attribute
|
355
|
|
|
do {
|
356
|
|
|
// Strip any non-alpha characters that may preceed an attribute.
|
357
|
|
|
// Browsers often parse these incorrectly and that has been a
|
358
|
|
|
// of numerous XSS issues we've had.
|
359
|
|
|
$matches[ 'attributes' ] = preg_replace('#^[^a-z]+#i', '', $matches[ 'attributes' ]);
|
360
|
|
|
|
361
|
|
|
if ( ! preg_match($attributesPattern, $matches[ 'attributes' ], $attribute, PREG_OFFSET_CAPTURE)) {
|
362
|
|
|
// No (valid) attribute found? Discard everything else inside the tag
|
363
|
|
|
break;
|
364
|
|
|
}
|
365
|
|
|
|
366
|
|
|
if (
|
367
|
|
|
// Is it indeed an "evil" attribute?
|
368
|
|
|
preg_match($is_evil_pattern, $attribute[ 'name' ][ 0 ])
|
369
|
|
|
// Or does it have an equals sign, but no value and not quoted? Strip that too!
|
370
|
|
|
OR (trim($attribute[ 'value' ][ 0 ]) === '')
|
371
|
|
|
) {
|
372
|
|
|
$attributes[] = 'xss=removed';
|
373
|
|
|
} else {
|
374
|
|
|
$attributes[] = $attribute[ 0 ][ 0 ];
|
375
|
|
|
}
|
376
|
|
|
|
377
|
|
|
$matches[ 'attributes' ] = substr(
|
378
|
|
|
$matches[ 'attributes' ],
|
379
|
|
|
$attribute[ 0 ][ 1 ] + strlen($attribute[ 0 ][ 0 ])
|
380
|
|
|
);
|
381
|
|
|
} while ($matches[ 'attributes' ] !== '');
|
382
|
|
|
$attributes = empty($attributes)
|
383
|
|
|
? ''
|
384
|
|
|
: ' ' . implode(' ', $attributes);
|
385
|
|
|
|
386
|
|
|
return '<' . $matches[ 'slash' ] . $matches[ 'tagName' ] . $attributes . '>';
|
387
|
|
|
}
|
388
|
|
|
|
389
|
|
|
return $matches[ 0 ];
|
390
|
|
|
}
|
391
|
|
|
|
392
|
|
|
// --------------------------------------------------------------------
|
393
|
|
|
|
394
|
|
|
/**
|
395
|
|
|
* JS Link Removal
|
396
|
|
|
*
|
397
|
|
|
* Callback method for xss_clean() to sanitize links.
|
398
|
|
|
*
|
399
|
|
|
* This limits the PCRE backtracks, making it more performance friendly
|
400
|
|
|
* and prevents PREG_BACKTRACK_LIMIT_ERROR from being triggered in
|
401
|
|
|
* PHP 5.2+ on link-heavy strings.
|
402
|
|
|
*
|
403
|
|
|
* @used-by XSS::clean()
|
404
|
|
|
*
|
405
|
|
|
* @param array $match
|
406
|
|
|
*
|
407
|
|
|
* @return string
|
408
|
|
|
*/
|
409
|
|
|
protected static function jsLinkRemoval($match)
|
410
|
|
|
{
|
411
|
|
|
return str_replace(
|
412
|
|
|
$match[ 1 ],
|
413
|
|
|
preg_replace(
|
414
|
|
|
'#href=.*?(?:(?:alert|prompt|confirm)(?:\(|&\#40;)|javascript:|livescript:|mocha:|charset=|window\.|document\.|\.cookie|<script|<xss|data\s*:)#si',
|
415
|
|
|
'',
|
416
|
|
|
self::filterAttributes(str_replace(['<', '>'], '', $match[ 1 ]))
|
417
|
|
|
),
|
418
|
|
|
$match[ 0 ]
|
419
|
|
|
);
|
420
|
|
|
}
|
421
|
|
|
|
422
|
|
|
// --------------------------------------------------------------------
|
423
|
|
|
|
424
|
|
|
/**
|
425
|
|
|
* Filter Attributes
|
426
|
|
|
*
|
427
|
|
|
* Filters tag attributes for consistency and safety.
|
428
|
|
|
*
|
429
|
|
|
* @used-by Security::jsImgRemoval()
|
430
|
|
|
* @used-by Security::jsLinkRemoval()
|
431
|
|
|
*
|
432
|
|
|
* @param string $str
|
433
|
|
|
*
|
434
|
|
|
* @return string
|
435
|
|
|
*/
|
436
|
|
|
protected static function filterAttributes($str)
|
437
|
|
|
{
|
438
|
|
|
$out = '';
|
439
|
|
|
if (preg_match_all('#\s*[a-z\-]+\s*=\s*(\042|\047)([^\\1]*?)\\1#is', $str, $matches)) {
|
440
|
|
|
foreach ($matches[ 0 ] as $match) {
|
441
|
|
|
$out .= preg_replace('#/\*.*?\*/#s', '', $match);
|
442
|
|
|
}
|
443
|
|
|
}
|
444
|
|
|
|
445
|
|
|
return $out;
|
446
|
|
|
}
|
447
|
|
|
|
448
|
|
|
// --------------------------------------------------------------------
|
449
|
|
|
|
450
|
|
|
/**
|
451
|
|
|
* JS Image Removal
|
452
|
|
|
*
|
453
|
|
|
* Callback method for xss_clean() to sanitize image tags.
|
454
|
|
|
*
|
455
|
|
|
* This limits the PCRE backtracks, making it more performance friendly
|
456
|
|
|
* and prevents PREG_BACKTRACK_LIMIT_ERROR from being triggered in
|
457
|
|
|
* PHP 5.2+ on image tag heavy strings.
|
458
|
|
|
*
|
459
|
|
|
* @used-by XSS::clean()
|
460
|
|
|
*
|
461
|
|
|
* @param array $match
|
462
|
|
|
*
|
463
|
|
|
* @return string
|
464
|
|
|
*/
|
465
|
|
|
protected static function jsImgRemoval($match)
|
466
|
|
|
{
|
467
|
|
|
return str_replace(
|
468
|
|
|
$match[ 1 ],
|
469
|
|
|
preg_replace(
|
470
|
|
|
'#src=.*?(?:(?:alert|prompt|confirm)(?:\(|&\#40;)|javascript:|livescript:|mocha:|charset=|window\.|document\.|\.cookie|<script|<xss|base64\s*,)#si',
|
471
|
|
|
'',
|
472
|
|
|
self::filterAttributes(str_replace(['<', '>'], '', $match[ 1 ]))
|
473
|
|
|
),
|
474
|
|
|
$match[ 0 ]
|
475
|
|
|
);
|
476
|
|
|
}
|
477
|
|
|
|
478
|
|
|
// --------------------------------------------------------------------
|
479
|
|
|
|
480
|
|
|
/**
|
481
|
|
|
* Attribute Conversion
|
482
|
|
|
*
|
483
|
|
|
* @used-by XSS::clean()
|
484
|
|
|
*
|
485
|
|
|
* @param array $match
|
486
|
|
|
*
|
487
|
|
|
* @return string
|
488
|
|
|
*/
|
489
|
|
|
protected static function convertAttribute($match)
|
490
|
|
|
{
|
491
|
|
|
return str_replace(['>', '<', '\\'], ['>', '<', '\\\\'], $match[ 0 ]);
|
492
|
|
|
}
|
493
|
|
|
|
494
|
|
|
// ------------------------------------------------------------------------
|
495
|
|
|
|
496
|
|
|
/**
|
497
|
|
|
* HTML Entity Decode Callback
|
498
|
|
|
*
|
499
|
|
|
* @used-by XSS::clean()
|
500
|
|
|
*
|
501
|
|
|
* @param array $match
|
502
|
|
|
*
|
503
|
|
|
* @return string
|
504
|
|
|
*/
|
505
|
|
|
protected static function decodeEntity($match)
|
506
|
|
|
{
|
507
|
|
|
// Protect GET variables in URLs
|
508
|
|
|
// 901119URL5918AMP18930PROTECT8198
|
509
|
|
|
$match = preg_replace('|\&([a-z\_0-9\-]+)\=([a-z\_0-9\-/]+)|i', self::token . '\\1=\\2', $match[ 0 ]);
|
|
|
|
|
510
|
|
|
|
511
|
|
|
$charset = 'UTF-8';
|
512
|
|
|
if (function_exists('config')) {
|
513
|
|
|
$charset = config()->getItem('charset');
|
514
|
|
|
}
|
515
|
|
|
|
516
|
|
|
// Decode, then un-protect URL GET vars
|
517
|
|
|
return str_replace(
|
518
|
|
|
self::token,
|
519
|
|
|
'&',
|
520
|
|
|
self::entityDecode($match, $charset)
|
521
|
|
|
);
|
522
|
|
|
}
|
523
|
|
|
|
524
|
|
|
// --------------------------------------------------------------------
|
525
|
|
|
|
526
|
|
|
/**
|
527
|
|
|
* HTML Entities Decode
|
528
|
|
|
*
|
529
|
|
|
* A replacement for html_entity_decode()
|
530
|
|
|
*
|
531
|
|
|
* The reason we are not using html_entity_decode() by itself is because
|
532
|
|
|
* while it is not technically correct to leave out the semicolon
|
533
|
|
|
* at the end of an entity most browsers will still interpret the entity
|
534
|
|
|
* correctly. html_entity_decode() does not convert entities without
|
535
|
|
|
* semicolons, so we are left with our own little solution here. Bummer.
|
536
|
|
|
*
|
537
|
|
|
* @link http://php.net/html-entity-decode
|
538
|
|
|
*
|
539
|
|
|
* @param string $string Input
|
540
|
|
|
* @param string $charset Character set
|
541
|
|
|
*
|
542
|
|
|
* @return string
|
543
|
|
|
*/
|
544
|
|
|
protected static function entityDecode($string, $charset = null)
|
545
|
|
|
{
|
546
|
|
|
if (strpos($string, '&') === false) {
|
547
|
|
|
return $string;
|
548
|
|
|
}
|
549
|
|
|
|
550
|
|
|
static $entities;
|
551
|
|
|
|
552
|
|
|
isset($charset) || $charset = 'UTF-8';
|
553
|
|
|
|
554
|
|
|
if (function_exists('config')) {
|
555
|
|
|
$charset = config()->getItem('charset');
|
556
|
|
|
}
|
557
|
|
|
|
558
|
|
|
$flag = ENT_COMPAT | ENT_HTML5;
|
559
|
|
|
|
560
|
|
|
do {
|
561
|
|
|
$comparissonString = $string;
|
562
|
|
|
|
563
|
|
|
// Decode standard entities, avoiding false positives
|
564
|
|
|
if ($c = preg_match_all('/&[a-z]{2,}(?![a-z;])/i', $string, $matches)) {
|
565
|
|
|
if ( ! isset($entities)) {
|
566
|
|
|
$entities = array_map(
|
567
|
|
|
'strtolower',
|
568
|
|
|
get_html_translation_table(HTML_ENTITIES, $flag)
|
569
|
|
|
);
|
570
|
|
|
}
|
571
|
|
|
|
572
|
|
|
$replace = [];
|
573
|
|
|
$matches = array_unique(array_map('strtolower', $matches[ 0 ]));
|
574
|
|
|
for ($i = 0; $i < $c; $i++) {
|
575
|
|
|
if (($char = array_search($matches[ $i ] . ';', $entities, true)) !== false) {
|
576
|
|
|
$replace[ $matches[ $i ] ] = $char;
|
577
|
|
|
}
|
578
|
|
|
}
|
579
|
|
|
|
580
|
|
|
$string = str_ireplace(array_keys($replace), array_values($replace), $string);
|
581
|
|
|
}
|
582
|
|
|
|
583
|
|
|
// Decode numeric & UTF16 two byte entities
|
584
|
|
|
$string = html_entity_decode(
|
585
|
|
|
preg_replace('/(&#(?:x0*[0-9a-f]{2,5}(?![0-9a-f;])|(?:0*\d{2,4}(?![0-9;]))))/iS', '$1;', $string),
|
586
|
|
|
$flag,
|
587
|
|
|
$charset
|
588
|
|
|
);
|
589
|
|
|
} while ($comparissonString !== $string);
|
590
|
|
|
|
591
|
|
|
return $string;
|
592
|
|
|
}
|
593
|
|
|
|
594
|
|
|
} |