1
|
|
|
<?php |
2
|
|
|
|
3
|
|
|
/** @noinspection ReturnTypeCanBeDeclaredInspection */ |
4
|
|
|
|
5
|
|
|
declare(strict_types=1); |
6
|
|
|
|
7
|
|
|
namespace voku\helper; |
8
|
|
|
|
9
|
|
|
use const ENT_DISALLOWED; |
10
|
|
|
use const ENT_HTML5; |
11
|
|
|
use const ENT_QUOTES; |
12
|
|
|
use const ENT_SUBSTITUTE; |
13
|
|
|
use const HTML_ENTITIES; |
14
|
|
|
|
15
|
|
|
/** |
16
|
|
|
* AntiXSS |
17
|
|
|
* |
18
|
|
|
* ported from "CodeIgniter" |
19
|
|
|
* |
20
|
|
|
* @copyright Copyright (c) 2008 - 2014, EllisLab, Inc. (http://ellislab.com/) |
21
|
|
|
* @copyright Copyright (c) 2014 - 2015, British Columbia Institute of Technology (http://bcit.ca/) |
22
|
|
|
* @copyright Copyright (c) 2015 - 2020, Lars Moelleken (https://moelleken.org/) |
23
|
|
|
* @license http://opensource.org/licenses/MIT MIT License |
24
|
|
|
*/ |
25
|
|
|
final class AntiXSS |
26
|
|
|
{ |
27
|
|
|
const VOKU_ANTI_XSS_GT = 'voku::anti-xss::gt'; |
28
|
|
|
|
29
|
|
|
const VOKU_ANTI_XSS_LT = 'voku::anti-xss::lt'; |
30
|
|
|
|
31
|
|
|
const VOKU_ANTI_XSS_STYLE = 'voku::anti-xss::STYLE'; |
32
|
|
|
|
33
|
|
|
/** |
34
|
|
|
* List of never allowed regex replacements. |
35
|
|
|
* |
36
|
|
|
* @var string[] |
37
|
|
|
*/ |
38
|
|
|
private $_never_allowed_regex = []; |
39
|
|
|
|
40
|
|
|
/** |
41
|
|
|
* List of html tags that will not closed automatically. |
42
|
|
|
* |
43
|
|
|
* @var string[] |
44
|
|
|
*/ |
45
|
|
|
private $_do_not_close_html_tags = []; |
46
|
|
|
|
47
|
|
|
/** |
48
|
|
|
* List of never allowed call statements. |
49
|
|
|
* |
50
|
|
|
* @var string[] |
51
|
|
|
*/ |
52
|
|
|
private static $_never_allowed_call = [ |
53
|
|
|
// default javascript |
54
|
|
|
'javascript', |
55
|
|
|
// Java: jar-protocol is an XSS hazard |
56
|
|
|
'jar', |
57
|
|
|
// Mac (will not run the script, but open it in AppleScript Editor) |
58
|
|
|
'applescript', |
59
|
|
|
// IE: https://www.owasp.org/index.php/XSS_Filter_Evasion_Cheat_Sheet#VBscript_in_an_image |
60
|
|
|
'vbscript', |
61
|
|
|
'vbs', |
62
|
|
|
// IE, surprise! |
63
|
|
|
'wscript', |
64
|
|
|
// IE |
65
|
|
|
'jscript', |
66
|
|
|
// https://html5sec.org/#behavior |
67
|
|
|
'behavior', |
68
|
|
|
// old Netscape |
69
|
|
|
'mocha', |
70
|
|
|
// old Netscape |
71
|
|
|
'livescript', |
72
|
|
|
// default view source |
73
|
|
|
'view-source', |
74
|
|
|
]; |
75
|
|
|
|
76
|
|
|
/** |
77
|
|
|
* @var string[] |
78
|
|
|
*/ |
79
|
|
|
private $_never_allowed_str_afterwards = [ |
80
|
|
|
'<script>', |
81
|
|
|
'</script>', |
82
|
|
|
]; |
83
|
|
|
|
84
|
|
|
/** |
85
|
|
|
* List of never allowed strings, afterwards. |
86
|
|
|
* |
87
|
|
|
* @var string[] |
88
|
|
|
*/ |
89
|
|
|
private $_never_allowed_on_events_afterwards = [ |
90
|
|
|
'onAbort', |
91
|
|
|
'onActivate', |
92
|
|
|
'onAttribute', |
93
|
|
|
'onAfterPrint', |
94
|
|
|
'onAfterScriptExecute', |
95
|
|
|
'onAfterUpdate', |
96
|
|
|
'onAnimationCancel', |
97
|
|
|
'onAnimationEnd', |
98
|
|
|
'onAnimationIteration', |
99
|
|
|
'onAnimationStart', |
100
|
|
|
'onAriaRequest', |
101
|
|
|
'onAutoComplete', |
102
|
|
|
'onAutoCompleteError', |
103
|
|
|
'onAuxClick', |
104
|
|
|
'onBeforeActivate', |
105
|
|
|
'onBeforeCopy', |
106
|
|
|
'onBeforeCut', |
107
|
|
|
'onBeforeDeactivate', |
108
|
|
|
'onBeforeEditFocus', |
109
|
|
|
'onBeforePaste', |
110
|
|
|
'onBeforePrint', |
111
|
|
|
'onBeforeScriptExecute', |
112
|
|
|
'onBeforeUnload', |
113
|
|
|
'onBeforeUpdate', |
114
|
|
|
'onBegin', |
115
|
|
|
'onBlur', |
116
|
|
|
'onBounce', |
117
|
|
|
'onCancel', |
118
|
|
|
'onCanPlay', |
119
|
|
|
'onCanPlayThrough', |
120
|
|
|
'onCellChange', |
121
|
|
|
'onChange', |
122
|
|
|
'onClick', |
123
|
|
|
'onClose', |
124
|
|
|
'onCommand', |
125
|
|
|
'onCompassNeedsCalibration', |
126
|
|
|
'onContextMenu', |
127
|
|
|
'onControlSelect', |
128
|
|
|
'onCopy', |
129
|
|
|
'onCueChange', |
130
|
|
|
'onCut', |
131
|
|
|
'onDataAvailable', |
132
|
|
|
'onDataSetChanged', |
133
|
|
|
'onDataSetComplete', |
134
|
|
|
'onDblClick', |
135
|
|
|
'onDeactivate', |
136
|
|
|
'onDeviceLight', |
137
|
|
|
'onDeviceMotion', |
138
|
|
|
'onDeviceOrientation', |
139
|
|
|
'onDeviceProximity', |
140
|
|
|
'onDrag', |
141
|
|
|
'onDragDrop', |
142
|
|
|
'onDragEnd', |
143
|
|
|
'onDragEnter', |
144
|
|
|
'onDragLeave', |
145
|
|
|
'onDragOver', |
146
|
|
|
'onDragStart', |
147
|
|
|
'onDrop', |
148
|
|
|
'onDurationChange', |
149
|
|
|
'onEmptied', |
150
|
|
|
'onEnd', |
151
|
|
|
'onEnded', |
152
|
|
|
'onError', |
153
|
|
|
'onErrorUpdate', |
154
|
|
|
'onExit', |
155
|
|
|
'onFilterChange', |
156
|
|
|
'onFinish', |
157
|
|
|
'onFocus', |
158
|
|
|
'onFocusIn', |
159
|
|
|
'onFocusOut', |
160
|
|
|
'onFormChange', |
161
|
|
|
'onFormInput', |
162
|
|
|
'onFullScreenChange', |
163
|
|
|
'onFullScreenError', |
164
|
|
|
'onGotPointerCapture', |
165
|
|
|
'onHashChange', |
166
|
|
|
'onHelp', |
167
|
|
|
'onInput', |
168
|
|
|
'onInvalid', |
169
|
|
|
'onKeyDown', |
170
|
|
|
'onKeyPress', |
171
|
|
|
'onKeyUp', |
172
|
|
|
'onLanguageChange', |
173
|
|
|
'onLayoutComplete', |
174
|
|
|
'onLoad', |
175
|
|
|
'onLoadedData', |
176
|
|
|
'onLoadedMetaData', |
177
|
|
|
'onLoadStart', |
178
|
|
|
'onLoseCapture', |
179
|
|
|
'onLostPointerCapture', |
180
|
|
|
'onMediaComplete', |
181
|
|
|
'onMediaError', |
182
|
|
|
'onMessage', |
183
|
|
|
'onMouseDown', |
184
|
|
|
'onMouseEnter', |
185
|
|
|
'onMouseLeave', |
186
|
|
|
'onMouseMove', |
187
|
|
|
'onMouseOut', |
188
|
|
|
'onMouseOver', |
189
|
|
|
'onMouseUp', |
190
|
|
|
'onMouseWheel', |
191
|
|
|
'onMove', |
192
|
|
|
'onMoveEnd', |
193
|
|
|
'onMoveStart', |
194
|
|
|
'onMozFullScreenChange', |
195
|
|
|
'onMozFullScreenError', |
196
|
|
|
'onMozPointerLockChange', |
197
|
|
|
'onMozPointerLockError', |
198
|
|
|
'onMsContentZoom', |
199
|
|
|
'onMsFullScreenChange', |
200
|
|
|
'onMsFullScreenError', |
201
|
|
|
'onMsGestureChange', |
202
|
|
|
'onMsGestureDoubleTap', |
203
|
|
|
'onMsGestureEnd', |
204
|
|
|
'onMsGestureHold', |
205
|
|
|
'onMsGestureStart', |
206
|
|
|
'onMsGestureTap', |
207
|
|
|
'onMsGotPointerCapture', |
208
|
|
|
'onMsInertiaStart', |
209
|
|
|
'onMsLostPointerCapture', |
210
|
|
|
'onMsManipulationStateChanged', |
211
|
|
|
'onMsPointerCancel', |
212
|
|
|
'onMsPointerDown', |
213
|
|
|
'onMsPointerEnter', |
214
|
|
|
'onMsPointerLeave', |
215
|
|
|
'onMsPointerMove', |
216
|
|
|
'onMsPointerOut', |
217
|
|
|
'onMsPointerOver', |
218
|
|
|
'onMsPointerUp', |
219
|
|
|
'onMsSiteModeJumpListItemRemoved', |
220
|
|
|
'onMsThumbnailClick', |
221
|
|
|
'onOffline', |
222
|
|
|
'onOnline', |
223
|
|
|
'onOutOfSync', |
224
|
|
|
'onPage', |
225
|
|
|
'onPageHide', |
226
|
|
|
'onPageShow', |
227
|
|
|
'onPaste', |
228
|
|
|
'onPause', |
229
|
|
|
'onPlay', |
230
|
|
|
'onPlaying', |
231
|
|
|
'onPointerCancel', |
232
|
|
|
'onPointerDown', |
233
|
|
|
'onPointerEnter', |
234
|
|
|
'onPointerLeave', |
235
|
|
|
'onPointerLockChange', |
236
|
|
|
'onPointerLockError', |
237
|
|
|
'onPointerMove', |
238
|
|
|
'onPointerOut', |
239
|
|
|
'onPointerOver', |
240
|
|
|
'onPointerUp', |
241
|
|
|
'onPopState', |
242
|
|
|
'onProgress', |
243
|
|
|
'onPropertyChange', |
244
|
|
|
'onqt_error', |
245
|
|
|
'onRateChange', |
246
|
|
|
'onReadyStateChange', |
247
|
|
|
'onReceived', |
248
|
|
|
'onRepeat', |
249
|
|
|
'onReset', |
250
|
|
|
'onResize', |
251
|
|
|
'onResizeEnd', |
252
|
|
|
'onResizeStart', |
253
|
|
|
'onResume', |
254
|
|
|
'onReverse', |
255
|
|
|
'onRowDelete', |
256
|
|
|
'onRowEnter', |
257
|
|
|
'onRowExit', |
258
|
|
|
'onRowInserted', |
259
|
|
|
'onRowsDelete', |
260
|
|
|
'onRowsEnter', |
261
|
|
|
'onRowsExit', |
262
|
|
|
'onRowsInserted', |
263
|
|
|
'onScroll', |
264
|
|
|
'onSearch', |
265
|
|
|
'onSeek', |
266
|
|
|
'onSeeked', |
267
|
|
|
'onSeeking', |
268
|
|
|
'onSelect', |
269
|
|
|
'onSelectionChange', |
270
|
|
|
'onSelectStart', |
271
|
|
|
'onStalled', |
272
|
|
|
'onStorage', |
273
|
|
|
'onStorageCommit', |
274
|
|
|
'onStart', |
275
|
|
|
'onStop', |
276
|
|
|
'onShow', |
277
|
|
|
'onSyncRestored', |
278
|
|
|
'onSubmit', |
279
|
|
|
'onSuspend', |
280
|
|
|
'onSynchRestored', |
281
|
|
|
'onTimeError', |
282
|
|
|
'onTimeUpdate', |
283
|
|
|
'onTimer', |
284
|
|
|
'onTrackChange', |
285
|
|
|
'onTransitionEnd', |
286
|
|
|
'onToggle', |
287
|
|
|
'onTouchCancel', |
288
|
|
|
'onTouchEnd', |
289
|
|
|
'onTouchLeave', |
290
|
|
|
'onTouchMove', |
291
|
|
|
'onTouchStart', |
292
|
|
|
'onTransitionCancel', |
293
|
|
|
'onTransitionEnd', |
294
|
|
|
'onUnload', |
295
|
|
|
'onURLFlip', |
296
|
|
|
'onUserProximity', |
297
|
|
|
'onVolumeChange', |
298
|
|
|
'onWaiting', |
299
|
|
|
'onWebKitAnimationEnd', |
300
|
|
|
'onWebKitAnimationIteration', |
301
|
|
|
'onWebKitAnimationStart', |
302
|
|
|
'onWebKitFullScreenChange', |
303
|
|
|
'onWebKitFullScreenError', |
304
|
|
|
'onWebKitTransitionEnd', |
305
|
|
|
'onWheel', |
306
|
|
|
]; |
307
|
|
|
|
308
|
|
|
/** |
309
|
|
|
* https://www.owasp.org/index.php/XSS_Filter_Evasion_Cheat_Sheet#Event_Handlers |
310
|
|
|
* |
311
|
|
|
* @var string[] |
312
|
|
|
*/ |
313
|
|
|
private $_evil_attributes_regex = [ |
314
|
|
|
'style', |
315
|
|
|
'xmlns:xdp', |
316
|
|
|
'formaction', |
317
|
|
|
'form', |
318
|
|
|
'xlink:href', |
319
|
|
|
'seekSegmentTime', |
320
|
|
|
'FSCommand', |
321
|
|
|
]; |
322
|
|
|
|
323
|
|
|
/** |
324
|
|
|
* @var string[] |
325
|
|
|
*/ |
326
|
|
|
private $_evil_html_tags = [ |
327
|
|
|
'applet', |
328
|
|
|
'audio', |
329
|
|
|
'basefont', |
330
|
|
|
'base', |
331
|
|
|
'behavior', |
332
|
|
|
'bgsound', |
333
|
|
|
'blink', |
334
|
|
|
'body', |
335
|
|
|
'embed', |
336
|
|
|
'eval', |
337
|
|
|
'expression', |
338
|
|
|
'form', |
339
|
|
|
'frameset', |
340
|
|
|
'frame', |
341
|
|
|
'head', |
342
|
|
|
'html', |
343
|
|
|
'ilayer', |
344
|
|
|
'iframe', |
345
|
|
|
'input', |
346
|
|
|
'button', |
347
|
|
|
'select', |
348
|
|
|
'isindex', |
349
|
|
|
'layer', |
350
|
|
|
'link', |
351
|
|
|
'meta', |
352
|
|
|
'keygen', |
353
|
|
|
'object', |
354
|
|
|
'plaintext', |
355
|
|
|
'style', |
356
|
|
|
'script', |
357
|
|
|
'textarea', |
358
|
|
|
'title', |
359
|
|
|
'math', |
360
|
|
|
'noscript', |
361
|
|
|
'event-source', |
362
|
|
|
'vmlframe', |
363
|
|
|
'video', |
364
|
|
|
'source', |
365
|
|
|
'svg', |
366
|
|
|
'xml', |
367
|
|
|
]; |
368
|
|
|
|
369
|
|
|
/** |
370
|
|
|
* @var string |
371
|
|
|
*/ |
372
|
|
|
private $_spacing_regex = '(?:\s|"|\'|\+|�[9A-F];|%0[9a-f])*?'; |
373
|
|
|
|
374
|
|
|
/** |
375
|
|
|
* The replacement-string for not allowed strings. |
376
|
|
|
* |
377
|
|
|
* @var string |
378
|
|
|
*/ |
379
|
|
|
private $_replacement = ''; |
380
|
|
|
|
381
|
|
|
/** |
382
|
|
|
* List of never allowed strings. |
383
|
|
|
* |
384
|
|
|
* @var string[] |
385
|
|
|
*/ |
386
|
|
|
private $_never_allowed_str = []; |
387
|
|
|
|
388
|
|
|
/** |
389
|
|
|
* If your DB (MySQL) encoding is "utf8" and not "utf8mb4", then |
390
|
|
|
* you can't save 4-Bytes chars from UTF-8 and someone can create stored XSS-attacks. |
391
|
|
|
* |
392
|
|
|
* @var bool |
393
|
|
|
*/ |
394
|
|
|
private $_stripe_4byte_chars = false; |
395
|
|
|
|
396
|
|
|
/** |
397
|
|
|
* @var bool|null |
398
|
|
|
*/ |
399
|
|
|
private $_xss_found; |
400
|
|
|
|
401
|
|
|
/** |
402
|
|
|
* @var string |
403
|
|
|
*/ |
404
|
|
|
private $_cache_evil_attributes_regex_string = ''; |
405
|
|
|
|
406
|
|
|
/** |
407
|
|
|
* @var string |
408
|
|
|
*/ |
409
|
|
|
private $_cache_never_allowed_regex_string = ''; |
410
|
|
|
|
411
|
|
|
/** |
412
|
|
|
* @var string |
413
|
|
|
*/ |
414
|
|
|
private $_cache__evil_html_tags_str = ''; |
415
|
|
|
|
416
|
|
|
/** |
417
|
|
|
* __construct() |
418
|
|
|
*/ |
419
|
99 |
|
public function __construct() |
420
|
|
|
{ |
421
|
99 |
|
$this->_initNeverAllowedStr(); |
422
|
99 |
|
$this->_initNeverAllowedRegex(); |
423
|
99 |
|
} |
424
|
|
|
|
425
|
|
|
/** |
426
|
|
|
* Compact any exploded words. |
427
|
|
|
* |
428
|
|
|
* <p> |
429
|
|
|
* <br /> |
430
|
|
|
* INFO: This corrects words like: j a v a s c r i p t |
431
|
|
|
* <br /> |
432
|
|
|
* These words are compacted back to their correct state. |
433
|
|
|
* </p> |
434
|
|
|
* |
435
|
|
|
* @param string $str |
436
|
|
|
* |
437
|
|
|
* @return string |
438
|
|
|
*/ |
439
|
99 |
|
private function _compact_exploded_javascript(string $str) |
440
|
|
|
{ |
441
|
99 |
|
static $WORDS_CACHE; |
442
|
99 |
|
$WORDS_CACHE['chunk'] = []; |
443
|
99 |
|
$WORDS_CACHE['split'] = []; |
444
|
|
|
|
445
|
|
|
$words = [ |
446
|
99 |
|
'javascript', |
447
|
|
|
'<script', |
448
|
|
|
'</script>', |
449
|
|
|
'base64', |
450
|
|
|
'document', |
451
|
|
|
'eval', |
452
|
|
|
]; |
453
|
|
|
|
454
|
|
|
// check if we need to perform the regex-stuff |
455
|
99 |
|
if (\strlen($str) <= 30) { |
456
|
75 |
|
$useStrPos = true; |
457
|
|
|
} else { |
458
|
86 |
|
$useStrPos = false; |
459
|
|
|
} |
460
|
|
|
|
461
|
99 |
|
foreach ($words as $word) { |
462
|
99 |
|
if (!isset($WORDS_CACHE['chunk'][$word])) { |
463
|
99 |
|
$WORDS_CACHE['chunk'][$word] = \substr( |
464
|
99 |
|
\chunk_split($word, 1, $this->_spacing_regex), |
465
|
99 |
|
0, |
466
|
99 |
|
-\strlen($this->_spacing_regex) |
467
|
|
|
); |
468
|
|
|
|
469
|
99 |
|
$WORDS_CACHE['split'][$word] = \str_split($word, 1); |
470
|
|
|
} |
471
|
|
|
|
472
|
99 |
|
if ($useStrPos) { |
473
|
75 |
|
foreach ($WORDS_CACHE['split'][$word] as $charTmp) { |
474
|
75 |
|
if (\stripos($str, $charTmp) === false) { |
475
|
75 |
|
continue 2; |
476
|
|
|
} |
477
|
|
|
} |
478
|
|
|
} |
479
|
|
|
|
480
|
|
|
// We only want to do this when it is followed by a non-word character. |
481
|
|
|
// And if there are no char at the start of the string. |
482
|
|
|
// |
483
|
|
|
// That way valid stuff like "dealer to!" does not become "dealerto". |
484
|
|
|
|
485
|
93 |
|
$str = (string) \preg_replace_callback( |
486
|
93 |
|
'#(?<before>[^\p{L}]|^)(?<word>' . \str_replace( |
487
|
93 |
|
['#', '.'], |
488
|
93 |
|
['\#', '\.'], |
489
|
93 |
|
$WORDS_CACHE['chunk'][$word] |
490
|
93 |
|
) . ')(?<after>[^\p{L}@.!? ]|$)#ius', |
491
|
|
|
function ($matches) { |
492
|
58 |
|
return $this->_compact_exploded_words_callback($matches); |
493
|
93 |
|
}, |
494
|
93 |
|
$str |
495
|
|
|
); |
496
|
|
|
} |
497
|
|
|
|
498
|
99 |
|
return $str; |
499
|
|
|
} |
500
|
|
|
|
501
|
|
|
/** |
502
|
|
|
* Compact exploded words. |
503
|
|
|
* |
504
|
|
|
* <p> |
505
|
|
|
* <br /> |
506
|
|
|
* INFO: Callback method for xss_clean() to remove whitespace from things like 'j a v a s c r i p t'. |
507
|
|
|
* </p> |
508
|
|
|
* |
509
|
|
|
* @param string[] $matches |
510
|
|
|
* |
511
|
|
|
* @return string |
512
|
|
|
*/ |
513
|
58 |
|
private function _compact_exploded_words_callback($matches) |
514
|
|
|
{ |
515
|
58 |
|
return $matches['before'] . \preg_replace( |
516
|
58 |
|
'/' . $this->_spacing_regex . '/ius', |
517
|
58 |
|
'', |
518
|
58 |
|
$matches['word'] |
519
|
58 |
|
) . $matches['after']; |
520
|
|
|
} |
521
|
|
|
|
522
|
|
|
/** |
523
|
|
|
* HTML-Entity decode callback. |
524
|
|
|
* |
525
|
|
|
* @param string[] $match |
526
|
|
|
* |
527
|
|
|
* @return string |
528
|
|
|
*/ |
529
|
93 |
|
private function _decode_entity(array $match) |
530
|
|
|
{ |
531
|
|
|
// init |
532
|
93 |
|
$str = $match[0]; |
533
|
|
|
|
534
|
|
|
// protect GET variables without XSS in URLs |
535
|
93 |
|
$needProtection = true; |
536
|
93 |
|
if (\strpos($str, '=') !== false) { |
537
|
88 |
|
$strCopy = $str; |
538
|
88 |
|
$matchesTmp = []; |
539
|
88 |
View Code Duplication |
while (\preg_match("/[?|&]?[\p{L}0-9_\-\[\]]+\s*=\s*([\"'])(?<attr>[^\1]*?)\\1/u", $strCopy, $matches)) { |
|
|
|
|
540
|
64 |
|
$matchesTmp[] = $matches; |
541
|
64 |
|
$strCopy = \str_replace($matches[0], '', $strCopy); |
542
|
|
|
|
543
|
64 |
|
if (\substr_count($strCopy, '"') <= 1 && \substr_count($strCopy, '\'') <= 1) { |
544
|
58 |
|
break; |
545
|
|
|
} |
546
|
|
|
} |
547
|
|
|
|
548
|
88 |
|
if ($strCopy === $str) { |
549
|
41 |
|
$needProtection = true; |
550
|
|
|
} else { |
551
|
64 |
|
$needProtection = false; |
552
|
64 |
|
foreach ($matchesTmp as $matches) { |
553
|
64 |
|
if (isset($matches['attr'])) { |
554
|
64 |
|
$tmpAntiXss = clone $this; |
555
|
|
|
|
556
|
64 |
|
$urlPartClean = $tmpAntiXss->xss_clean($matches['attr']); |
557
|
|
|
|
558
|
64 |
|
if ($tmpAntiXss->isXssFound() === true) { |
559
|
45 |
|
$this->_xss_found = true; |
560
|
|
|
|
561
|
45 |
|
$urlPartClean = \str_replace(['<', '>'], [self::VOKU_ANTI_XSS_LT, self::VOKU_ANTI_XSS_GT], $urlPartClean); |
562
|
45 |
|
$urlPartClean = UTF8::rawurldecode($urlPartClean); |
563
|
45 |
|
$urlPartClean = \str_replace([self::VOKU_ANTI_XSS_LT, self::VOKU_ANTI_XSS_GT], ['<', '>'], $urlPartClean); |
564
|
|
|
|
565
|
64 |
|
$str = \str_ireplace($matches['attr'], $urlPartClean, $str); |
566
|
|
|
} |
567
|
|
|
} |
568
|
|
|
} |
569
|
|
|
} |
570
|
|
|
} |
571
|
|
|
|
572
|
93 |
|
if ($needProtection) { |
573
|
58 |
|
$str = \str_replace(['<', '>'], [self::VOKU_ANTI_XSS_LT, self::VOKU_ANTI_XSS_GT], $str); |
574
|
58 |
|
$str = $this->_entity_decode(UTF8::rawurldecode($str)); |
575
|
58 |
|
$str = \str_replace([self::VOKU_ANTI_XSS_LT, self::VOKU_ANTI_XSS_GT], ['<', '>'], $str); |
576
|
|
|
} |
577
|
|
|
|
578
|
93 |
|
return $str; |
579
|
|
|
} |
580
|
|
|
|
581
|
|
|
/** |
582
|
|
|
* Decode the html-tags via "UTF8::html_entity_decode()" or the string via "UTF8::rawurldecode()". |
583
|
|
|
* |
584
|
|
|
* @param string $str |
585
|
|
|
* |
586
|
|
|
* @return string |
587
|
|
|
*/ |
588
|
99 |
|
private function _decode_string(string $str) |
589
|
|
|
{ |
590
|
|
|
// init |
591
|
99 |
|
$regExForHtmlTags = '/<\p{L}+.*+/us'; |
592
|
|
|
|
593
|
|
|
if ( |
594
|
99 |
|
\strpos($str, '<') !== false |
595
|
|
|
&& |
596
|
99 |
|
\preg_match($regExForHtmlTags, $str, $matches) === 1 |
597
|
|
|
) { |
598
|
93 |
|
$str = (string) \preg_replace_callback( |
599
|
93 |
|
$regExForHtmlTags, |
600
|
|
|
function ($matches) { |
601
|
93 |
|
return $this->_decode_entity($matches); |
602
|
93 |
|
}, |
603
|
93 |
|
$str |
604
|
|
|
); |
605
|
|
|
} else { |
606
|
87 |
|
$str = UTF8::rawurldecode($str); |
607
|
|
|
} |
608
|
|
|
|
609
|
99 |
|
return $str; |
610
|
|
|
} |
611
|
|
|
|
612
|
|
|
/** |
613
|
|
|
* @param string $str |
614
|
|
|
* |
615
|
|
|
* @return mixed |
616
|
|
|
*/ |
617
|
99 |
|
private function _do($str) |
618
|
|
|
{ |
619
|
99 |
|
$str = (string) $str; |
620
|
99 |
|
$strInt = (int) $str; |
621
|
99 |
|
$strFloat = (float) $str; |
622
|
|
|
if ( |
623
|
99 |
|
!$str |
624
|
|
|
|| |
625
|
99 |
|
(string) $strInt === $str |
626
|
|
|
|| |
627
|
99 |
|
(string) $strFloat === $str |
628
|
|
|
) { |
629
|
|
|
|
630
|
|
|
// no xss found |
631
|
26 |
|
if ($this->_xss_found !== true) { |
632
|
23 |
|
$this->_xss_found = false; |
633
|
|
|
} |
634
|
|
|
|
635
|
26 |
|
return $str; |
636
|
|
|
} |
637
|
|
|
|
638
|
|
|
// remove the BOM from UTF-8 / UTF-16 / UTF-32 strings |
639
|
99 |
|
$str = UTF8::remove_bom($str); |
640
|
|
|
|
641
|
|
|
// replace the diamond question mark (�) and invalid-UTF8 chars |
642
|
99 |
|
$str = UTF8::replace_diamond_question_mark($str, ''); |
643
|
|
|
|
644
|
|
|
// replace invisible characters with one single space |
645
|
99 |
|
$str = UTF8::remove_invisible_characters($str, true, ' '); |
646
|
|
|
|
647
|
|
|
// normalize the whitespace |
648
|
99 |
|
$str = UTF8::normalize_whitespace($str); |
649
|
|
|
|
650
|
|
|
// decode UTF-7 characters |
651
|
99 |
|
$str = $this->_repack_utf7($str); |
652
|
|
|
|
653
|
|
|
// decode the string |
654
|
99 |
|
$str = $this->_decode_string($str); |
655
|
|
|
|
656
|
|
|
// remove all >= 4-Byte chars if needed |
657
|
99 |
|
if ($this->_stripe_4byte_chars) { |
658
|
1 |
|
$str = (string) \preg_replace('/[\x{10000}-\x{10FFFF}]/u', '', $str); |
659
|
|
|
} |
660
|
|
|
|
661
|
|
|
// backup the string (for later comparision) |
662
|
99 |
|
$str_backup = $str; |
663
|
|
|
|
664
|
|
|
// correct words before the browser will do it |
665
|
99 |
|
$str = $this->_compact_exploded_javascript($str); |
666
|
|
|
|
667
|
|
|
// remove disallowed javascript calls in links, images etc. |
668
|
99 |
|
$str = $this->_remove_disallowed_javascript($str); |
669
|
|
|
|
670
|
|
|
// remove strings that are never allowed |
671
|
99 |
|
$str = $this->_do_never_allowed($str); |
672
|
|
|
|
673
|
|
|
// remove evil attributes such as style, onclick and xmlns |
674
|
99 |
|
$str = $this->_remove_evil_attributes($str); |
675
|
|
|
|
676
|
|
|
// sanitize naughty JavaScript elements |
677
|
99 |
|
$str = $this->_sanitize_naughty_javascript($str); |
678
|
|
|
|
679
|
|
|
// sanitize naughty HTML elements |
680
|
99 |
|
$str = $this->_sanitize_naughty_html($str); |
681
|
|
|
|
682
|
|
|
// final clean up |
683
|
|
|
// |
684
|
|
|
// -> This adds a bit of extra precaution in case something got through the above filters. |
685
|
99 |
|
$str = $this->_do_never_allowed_afterwards($str); |
686
|
|
|
|
687
|
|
|
// check for xss |
688
|
99 |
|
if ($this->_xss_found !== true) { |
689
|
99 |
|
$this->_xss_found = !($str_backup === $str); |
690
|
|
|
} |
691
|
|
|
|
692
|
99 |
|
return $str; |
693
|
|
|
} |
694
|
|
|
|
695
|
|
|
/** |
696
|
|
|
* Remove never allowed strings. |
697
|
|
|
* |
698
|
|
|
* @param string $str |
699
|
|
|
* |
700
|
|
|
* @return string |
701
|
|
|
*/ |
702
|
99 |
|
private function _do_never_allowed(string $str) |
703
|
|
|
{ |
704
|
99 |
|
static $NEVER_ALLOWED_CACHE = []; |
705
|
|
|
|
706
|
99 |
|
$NEVER_ALLOWED_CACHE['keys'] = null; |
707
|
|
|
|
708
|
99 |
|
if ($NEVER_ALLOWED_CACHE['keys'] === null) { |
709
|
99 |
|
$NEVER_ALLOWED_CACHE['keys'] = \array_keys($this->_never_allowed_str); |
710
|
|
|
} |
711
|
|
|
|
712
|
99 |
|
$str = \str_ireplace( |
713
|
99 |
|
$NEVER_ALLOWED_CACHE['keys'], |
714
|
99 |
|
$this->_never_allowed_str, |
715
|
99 |
|
$str |
716
|
|
|
); |
717
|
|
|
|
718
|
|
|
// --- |
719
|
|
|
|
720
|
99 |
|
$replaceNeverAllowedCall = []; |
721
|
99 |
|
foreach (self::$_never_allowed_call as $call) { |
722
|
99 |
|
if (\stripos($str, $call) !== false) { |
723
|
99 |
|
$replaceNeverAllowedCall[] = $call; |
724
|
|
|
} |
725
|
|
|
} |
726
|
99 |
|
if (\count($replaceNeverAllowedCall) > 0) { |
727
|
39 |
|
$str = (string) \preg_replace( |
728
|
39 |
|
'#([^\p{L}]|^)(?:' . \implode('|', $replaceNeverAllowedCall) . ')\s*:(?:.*?([/\\\;()\'">]|$))#ius', |
729
|
39 |
|
'$1' . $this->_replacement . '$2', |
730
|
39 |
|
$str |
731
|
|
|
); |
732
|
|
|
} |
733
|
|
|
|
734
|
|
|
// --- |
735
|
|
|
|
736
|
99 |
|
$regex_combined = []; |
737
|
99 |
|
foreach ($this->_never_allowed_regex as $regex => $replacement) { |
738
|
99 |
|
if ($replacement === $this->_replacement) { |
739
|
99 |
|
$regex_combined[] = $regex; |
740
|
|
|
|
741
|
99 |
|
continue; |
742
|
|
|
} |
743
|
|
|
|
744
|
99 |
|
$str = (string) \preg_replace( |
745
|
99 |
|
'#' . $regex . '#iUus', |
746
|
99 |
|
$replacement, |
747
|
99 |
|
$str |
748
|
|
|
); |
749
|
|
|
} |
750
|
|
|
|
751
|
99 |
|
if (!$this->_cache_never_allowed_regex_string || $regex_combined !== []) { |
752
|
99 |
|
$this->_cache_never_allowed_regex_string = \implode('|', $regex_combined); |
753
|
|
|
} |
754
|
|
|
|
755
|
99 |
|
if ($this->_cache_never_allowed_regex_string) { |
756
|
99 |
|
$str = (string) \preg_replace( |
757
|
99 |
|
'#' . $this->_cache_never_allowed_regex_string . '#ius', |
758
|
99 |
|
$this->_replacement, |
759
|
99 |
|
$str |
760
|
|
|
); |
761
|
|
|
} |
762
|
|
|
|
763
|
99 |
|
return $str; |
764
|
|
|
} |
765
|
|
|
|
766
|
|
|
/** |
767
|
|
|
* Remove never allowed string, afterwards. |
768
|
|
|
* |
769
|
|
|
* <p> |
770
|
|
|
* <br /> |
771
|
|
|
* INFO: clean-up also some string, if there is no html-tag |
772
|
|
|
* </p> |
773
|
|
|
* |
774
|
|
|
* @param string $str |
775
|
|
|
* |
776
|
|
|
* @return string |
777
|
|
|
*/ |
778
|
99 |
|
private function _do_never_allowed_afterwards(string $str) |
779
|
|
|
{ |
780
|
99 |
|
if (\stripos($str, 'on') !== false) { |
781
|
49 |
|
foreach ($this->_never_allowed_on_events_afterwards as $event) { |
782
|
49 |
|
if (\stripos($str, $event) !== false) { |
783
|
20 |
|
$regex = '(?<before>[^\p{L}]|^)(?:' . $event . ')(?<after>\(.*?\)|.*?>|(?:\s|\[.*?\])*?=(?:\s|\[.*?\])*?|(?:\s|\[.*?\])*?=(?:\s|\[.*?\])*?|[^\p{L}]*?=[^\p{L}]*?|[^\p{L}]*?=[^\p{L}]*?|$|\s*?>*?$)'; |
784
|
|
|
|
785
|
|
|
do { |
786
|
20 |
|
$count = $temp_count = 0; |
787
|
|
|
|
788
|
20 |
|
$str = (string) \preg_replace( |
789
|
20 |
|
'#' . $regex . '#ius', |
790
|
20 |
|
'$1' . $this->_replacement . '$2', |
791
|
20 |
|
$str, |
792
|
20 |
|
-1, |
793
|
20 |
|
$temp_count |
794
|
|
|
); |
795
|
20 |
|
$count += $temp_count; |
796
|
49 |
|
} while ($count); |
797
|
|
|
} |
798
|
|
|
} |
799
|
|
|
} |
800
|
|
|
|
801
|
99 |
|
return (string) \str_ireplace( |
802
|
99 |
|
$this->_never_allowed_str_afterwards, |
803
|
99 |
|
$this->_replacement, |
804
|
99 |
|
$str |
805
|
|
|
); |
806
|
|
|
} |
807
|
|
|
|
808
|
|
|
/** |
809
|
|
|
* Entity-decoding. |
810
|
|
|
* |
811
|
|
|
* @param string $str |
812
|
|
|
* |
813
|
|
|
* @return string |
814
|
|
|
*/ |
815
|
58 |
|
private function _entity_decode(string $str) |
816
|
|
|
{ |
817
|
58 |
|
static $HTML_ENTITIES_CACHE; |
818
|
|
|
|
819
|
58 |
|
$flags = ENT_QUOTES | ENT_HTML5 | ENT_DISALLOWED | ENT_SUBSTITUTE; |
820
|
|
|
|
821
|
|
|
// decode |
822
|
58 |
|
$str = UTF8::html_entity_decode($str, $flags); |
823
|
|
|
|
824
|
|
|
// decode-again, for e.g. HHVM or miss configured applications ... |
825
|
|
|
if ( |
826
|
58 |
|
\strpos($str, '&') !== false |
827
|
|
|
&& |
828
|
58 |
|
\preg_match_all('/(?<html_entity>&[A-Za-z]{2,}[;]{0})/', $str, $matches) |
829
|
|
|
) { |
830
|
2 |
|
if ($HTML_ENTITIES_CACHE === null) { |
831
|
|
|
|
832
|
|
|
// links: |
833
|
|
|
// - http://dev.w3.org/html5/html-author/charref |
834
|
|
|
// - http://www.w3schools.com/charsets/ref_html_entities_n.asp |
835
|
|
|
$entitiesSecurity = [ |
836
|
1 |
|
'�' => '', |
837
|
|
|
'�' => '', |
838
|
|
|
'' => '', |
839
|
|
|
'' => '', |
840
|
|
|
'>⃒' => '', |
841
|
|
|
'' => '', |
842
|
|
|
'' => '', |
843
|
|
|
'­' => '', |
844
|
|
|
'­' => '', |
845
|
|
|
'­' => '', |
846
|
|
|
':' => ':', |
847
|
|
|
':' => ':', |
848
|
|
|
':' => ':', |
849
|
|
|
'(' => '(', |
850
|
|
|
'(' => '(', |
851
|
|
|
'(' => '(', |
852
|
|
|
')' => ')', |
853
|
|
|
')' => ')', |
854
|
|
|
')' => ')', |
855
|
|
|
'?' => '?', |
856
|
|
|
'?' => '?', |
857
|
|
|
'?' => '?', |
858
|
|
|
'/' => '/', |
859
|
|
|
'/' => '/', |
860
|
|
|
'/' => '/', |
861
|
|
|
''' => '\'', |
862
|
|
|
''' => '\'', |
863
|
|
|
''' => '\'', |
864
|
|
|
''' => '\'', |
865
|
|
|
''' => '\'', |
866
|
|
|
'\' => '\'', |
867
|
|
|
'\' => '\\', |
868
|
|
|
'\' => '\\', |
869
|
|
|
',' => ',', |
870
|
|
|
',' => ',', |
871
|
|
|
',' => ',', |
872
|
|
|
'.' => '.', |
873
|
|
|
'.' => '.', |
874
|
|
|
'"' => '"', |
875
|
|
|
'"' => '"', |
876
|
|
|
'"' => '"', |
877
|
|
|
'"' => '"', |
878
|
|
|
'`' => '`', |
879
|
|
|
'`' => '`', |
880
|
|
|
'`' => '`', |
881
|
|
|
'`' => '`', |
882
|
|
|
'.' => '.', |
883
|
|
|
'=' => '=', |
884
|
|
|
'=' => '=', |
885
|
|
|
'=' => '=', |
886
|
|
|
'&newline;' => "\n", |
887
|
|
|
'
' => "\n", |
888
|
|
|
' ' => "\n", |
889
|
|
|
'&tab;' => "\t", |
890
|
|
|
'	' => "\t", |
891
|
|
|
'	' => "\t", |
892
|
|
|
]; |
893
|
|
|
|
894
|
1 |
|
$HTML_ENTITIES_CACHE = \array_merge( |
895
|
1 |
|
$entitiesSecurity, |
896
|
1 |
|
\array_flip(\get_html_translation_table(HTML_ENTITIES, $flags)), |
897
|
1 |
|
\array_flip(self::_get_data('entities_fallback')) |
898
|
|
|
); |
899
|
|
|
} |
900
|
|
|
|
901
|
2 |
|
$search = []; |
902
|
2 |
|
$replace = []; |
903
|
2 |
|
foreach ($matches['html_entity'] as $match) { |
904
|
2 |
|
$match .= ';'; |
905
|
2 |
|
if (isset($HTML_ENTITIES_CACHE[$match])) { |
906
|
|
|
$search[$match] = $match; |
907
|
2 |
|
$replace[$match] = $HTML_ENTITIES_CACHE[$match]; |
908
|
|
|
} |
909
|
|
|
} |
910
|
|
|
|
911
|
2 |
|
if (\count($replace) > 0) { |
912
|
|
|
$str = \str_ireplace($search, $replace, $str); |
913
|
|
|
} |
914
|
|
|
} |
915
|
|
|
|
916
|
58 |
|
return $str; |
917
|
|
|
} |
918
|
|
|
|
919
|
|
|
/** |
920
|
|
|
* Filters tag attributes for consistency and safety. |
921
|
|
|
* |
922
|
|
|
* @param string $str |
923
|
|
|
* |
924
|
|
|
* @return string |
925
|
|
|
*/ |
926
|
41 |
|
private function _filter_attributes(string $str) |
927
|
|
|
{ |
928
|
41 |
|
if ($str === '') { |
929
|
17 |
|
return ''; |
930
|
|
|
} |
931
|
|
|
|
932
|
41 |
|
if (\strpos($str, '=') !== false) { |
933
|
40 |
|
$matchesTmp = []; |
934
|
40 |
View Code Duplication |
while (\preg_match('#\s*[\p{L}0-9_\-\[\]]+\s*=\s*(["\'])(?:[^\1]*?)\\1#u', $str, $matches)) { |
|
|
|
|
935
|
33 |
|
$matchesTmp[] = $matches[0]; |
936
|
33 |
|
$str = \str_replace($matches[0], '', $str); |
937
|
|
|
|
938
|
33 |
|
if (\substr_count($str, '"') <= 1 && \substr_count($str, '\'') <= 1) { |
939
|
33 |
|
break; |
940
|
|
|
} |
941
|
|
|
} |
942
|
40 |
|
$out = \implode('', $matchesTmp); |
943
|
|
|
} else { |
944
|
11 |
|
$out = $str; |
945
|
|
|
} |
946
|
|
|
|
947
|
41 |
|
return $out; |
948
|
|
|
} |
949
|
|
|
|
950
|
|
|
/** |
951
|
|
|
* get data from "/data/*.php" |
952
|
|
|
* |
953
|
|
|
* @param string $file |
954
|
|
|
* |
955
|
|
|
* @return mixed |
956
|
|
|
*/ |
957
|
1 |
|
private static function _get_data(string $file) |
958
|
|
|
{ |
959
|
|
|
/** @noinspection PhpIncludeInspection */ |
960
|
1 |
|
return include __DIR__ . '/data/' . $file . '.php'; |
961
|
|
|
} |
962
|
|
|
|
963
|
|
|
/** |
964
|
|
|
* initialize "$this->_never_allowed_str" |
965
|
|
|
* |
966
|
|
|
* @return void |
967
|
|
|
*/ |
968
|
99 |
|
private function _initNeverAllowedStr() |
969
|
|
|
{ |
970
|
99 |
|
$this->_never_allowed_str = [ |
|
|
|
|
971
|
99 |
|
'document.cookie' => $this->_replacement, |
972
|
99 |
|
'(document).cookie' => $this->_replacement, |
973
|
99 |
|
'document.write' => $this->_replacement, |
974
|
99 |
|
'(document).write' => $this->_replacement, |
975
|
99 |
|
'.parentNode' => $this->_replacement, |
976
|
99 |
|
'.innerHTML' => $this->_replacement, |
977
|
99 |
|
'.appendChild' => $this->_replacement, |
978
|
99 |
|
'-moz-binding' => $this->_replacement, |
979
|
99 |
|
'<?' => '<?', |
980
|
99 |
|
'?>' => '?>', |
981
|
99 |
|
'<![CDATA[' => '<![CDATA[', |
982
|
99 |
|
'<!ENTITY' => '<!ENTITY', |
983
|
99 |
|
'<!DOCTYPE' => '<!DOCTYPE', |
984
|
99 |
|
'<!ATTLIST' => '<!ATTLIST', |
985
|
|
|
]; |
986
|
99 |
|
} |
987
|
|
|
|
988
|
|
|
/** |
989
|
|
|
* initialize "$this->_never_allowed_regex" |
990
|
|
|
* |
991
|
|
|
* @return void |
992
|
|
|
*/ |
993
|
99 |
|
private function _initNeverAllowedRegex() |
994
|
|
|
{ |
995
|
99 |
|
$this->_never_allowed_regex = [ |
|
|
|
|
996
|
|
|
// default javascript |
997
|
99 |
|
'(\(?:?document\)?|\(?:?window\)?(?:\.document)?)\.(?:location|on\w*)' => $this->_replacement, |
998
|
|
|
// data-attribute + base64 |
999
|
99 |
|
"([\"'])?data\s*:\s*(?!image\s*\/\s*(?!svg.*?))[^\1]*?base64[^\1]*?,[^\1]*?\1?" => $this->_replacement, |
1000
|
|
|
// old IE, old Netscape |
1001
|
99 |
|
'expression\s*(?:\(|&\#40;)' => $this->_replacement, |
1002
|
|
|
// src="js" |
1003
|
99 |
|
'src\=(?<wrapper>[\'|"]).*\.js(?:\g{wrapper})' => $this->_replacement, |
1004
|
|
|
// comments |
1005
|
99 |
|
'<!--(.*)-->' => '<!--$1-->', |
1006
|
99 |
|
'<!--' => '<!--', |
1007
|
|
|
]; |
1008
|
99 |
|
} |
1009
|
|
|
|
1010
|
|
|
/** |
1011
|
|
|
* Callback method for xss_clean() to sanitize links. |
1012
|
|
|
* |
1013
|
|
|
* <p> |
1014
|
|
|
* <br /> |
1015
|
|
|
* INFO: This limits the PCRE backtracks, making it more performance friendly |
1016
|
|
|
* and prevents PREG_BACKTRACK_LIMIT_ERROR from being triggered in |
1017
|
|
|
* PHP 5.2+ on link-heavy strings. |
1018
|
|
|
* </p> |
1019
|
|
|
* |
1020
|
|
|
* @param string[] $match |
1021
|
|
|
* |
1022
|
|
|
* @return string |
1023
|
|
|
*/ |
1024
|
25 |
|
private function _js_link_removal_callback(array $match) |
1025
|
|
|
{ |
1026
|
25 |
|
return $this->_js_removal_callback($match, 'href'); |
1027
|
|
|
} |
1028
|
|
|
|
1029
|
|
|
/** |
1030
|
|
|
* Callback method for xss_clean() to sanitize tags. |
1031
|
|
|
* |
1032
|
|
|
* <p> |
1033
|
|
|
* <br /> |
1034
|
|
|
* INFO: This limits the PCRE backtracks, making it more performance friendly |
1035
|
|
|
* and prevents PREG_BACKTRACK_LIMIT_ERROR from being triggered in |
1036
|
|
|
* PHP 5.2+ on image tag heavy strings. |
1037
|
|
|
* </p> |
1038
|
|
|
* |
1039
|
|
|
* @param string[] $match |
1040
|
|
|
* @param string $search |
1041
|
|
|
* |
1042
|
|
|
* @return string |
1043
|
|
|
*/ |
1044
|
41 |
|
private function _js_removal_callback(array $match, string $search) |
1045
|
|
|
{ |
1046
|
41 |
|
if (!$match[0]) { |
1047
|
|
|
return ''; |
1048
|
|
|
} |
1049
|
|
|
|
1050
|
|
|
// init |
1051
|
41 |
|
$match_style_matched = false; |
1052
|
41 |
|
$match_style = []; |
1053
|
|
|
|
1054
|
|
|
// hack for style attributes v1 |
1055
|
|
|
if ( |
1056
|
41 |
|
$search === 'href' |
1057
|
|
|
&& |
1058
|
41 |
|
\stripos($match[0], 'style') !== false |
1059
|
|
|
) { |
1060
|
6 |
|
\preg_match('/style=".*?"/ius', $match[0], $match_style); |
1061
|
6 |
|
$match_style_matched = (\count($match_style) > 0); |
1062
|
6 |
|
if ($match_style_matched) { |
1063
|
4 |
|
$match[0] = \str_ireplace($match_style[0], self::VOKU_ANTI_XSS_STYLE, $match[0]); |
1064
|
|
|
} |
1065
|
|
|
} |
1066
|
|
|
|
1067
|
41 |
|
$replacer = $this->_filter_attributes(\str_replace(['<', '>'], '', $match[1])); |
1068
|
|
|
|
1069
|
41 |
|
$foundEqualSign = \strpos($match[1], '=') !== false; |
1070
|
|
|
|
1071
|
|
|
// filter for "$search"-attributes |
1072
|
|
|
if ( |
1073
|
41 |
|
$foundEqualSign |
1074
|
|
|
&& |
1075
|
41 |
|
\stripos($replacer, $search) !== false |
1076
|
|
|
) { |
1077
|
30 |
|
$pattern = '#' . $search . '=(?<wrapper>[\'|"]).*(?:\g{wrapper})#isU'; |
1078
|
30 |
|
$matchInner = []; |
1079
|
30 |
|
$foundSomethingBad = false; |
1080
|
30 |
|
\preg_match($pattern, $match[1], $matchInner); |
1081
|
30 |
|
if (\count($matchInner) > 0) { |
1082
|
30 |
|
$tmpAntiXss = clone $this; |
1083
|
|
|
|
1084
|
|
|
/** @noinspection UnusedFunctionResultInspection */ |
1085
|
30 |
|
$tmpAntiXss->xss_clean($matchInner[0]); |
1086
|
|
|
|
1087
|
30 |
|
if ($tmpAntiXss->isXssFound() === true) { |
1088
|
9 |
|
$foundSomethingBad = true; |
1089
|
9 |
|
$this->_xss_found = true; |
1090
|
|
|
|
1091
|
9 |
|
$replacer = (string) \preg_replace( |
1092
|
9 |
|
$pattern, |
1093
|
9 |
|
$search . '="' . $this->_replacement . '"', |
1094
|
9 |
|
$replacer |
1095
|
|
|
); |
1096
|
|
|
} |
1097
|
|
|
} |
1098
|
|
|
|
1099
|
30 |
|
if (!$foundSomethingBad) { |
1100
|
|
|
// filter for javascript |
1101
|
30 |
|
$patternTmp = ''; |
1102
|
30 |
|
foreach (self::$_never_allowed_call as $callTmp) { |
1103
|
30 |
|
if (\stripos($match[0], $callTmp) !== false) { |
1104
|
30 |
|
$patternTmp .= $callTmp . ':|'; |
1105
|
|
|
} |
1106
|
|
|
} |
1107
|
30 |
|
$pattern = '#' . $search . '=.*(?:' . $patternTmp . '\(?window\)?\.|\(?history\)?\.|\(?location\)?\.|\(?document\)?\.|\(?cookie\)?\.|\(?ScriptElement\)?\.|d\s*a\s*t\s*a\s*:)#ius'; |
1108
|
30 |
|
$matchInner = []; |
1109
|
30 |
|
\preg_match($pattern, $match[1], $matchInner); |
1110
|
30 |
|
if (\count($matchInner) > 0) { |
1111
|
3 |
|
$replacer = (string) \preg_replace( |
1112
|
3 |
|
$pattern, |
1113
|
3 |
|
$search . '="' . $this->_replacement . '"', |
1114
|
3 |
|
$replacer |
1115
|
|
|
); |
1116
|
|
|
} |
1117
|
|
|
} |
1118
|
|
|
} |
1119
|
|
|
|
1120
|
41 |
|
$return = \str_ireplace($match[1], $replacer, (string) $match[0]); |
1121
|
|
|
|
1122
|
|
|
// hack for style attributes v2 |
1123
|
|
|
if ( |
1124
|
41 |
|
$match_style_matched |
1125
|
|
|
&& |
1126
|
41 |
|
$search === 'href' |
1127
|
|
|
) { |
1128
|
4 |
|
$return = \str_replace(self::VOKU_ANTI_XSS_STYLE, $match_style[0], $return); |
1129
|
|
|
} |
1130
|
|
|
|
1131
|
41 |
|
return $return; |
1132
|
|
|
} |
1133
|
|
|
|
1134
|
|
|
/** |
1135
|
|
|
* Callback method for xss_clean() to sanitize image tags. |
1136
|
|
|
* |
1137
|
|
|
* <p> |
1138
|
|
|
* <br /> |
1139
|
|
|
* INFO: This limits the PCRE backtracks, making it more performance friendly |
1140
|
|
|
* and prevents PREG_BACKTRACK_LIMIT_ERROR from being triggered in |
1141
|
|
|
* PHP 5.2+ on image tag heavy strings. |
1142
|
|
|
* </p> |
1143
|
|
|
* |
1144
|
|
|
* @param string[] $match |
1145
|
|
|
* |
1146
|
|
|
* @return string |
1147
|
|
|
*/ |
1148
|
28 |
|
private function _js_src_removal_callback(array $match) |
1149
|
|
|
{ |
1150
|
28 |
|
return $this->_js_removal_callback($match, 'src'); |
1151
|
|
|
} |
1152
|
|
|
|
1153
|
|
|
/** |
1154
|
|
|
* Remove disallowed Javascript in links or img tags |
1155
|
|
|
* |
1156
|
|
|
* <p> |
1157
|
|
|
* <br /> |
1158
|
|
|
* We used to do some version comparisons and use of stripos(), |
1159
|
|
|
* but it is dog slow compared to these simplified non-capturing |
1160
|
|
|
* preg_match(), especially if the pattern exists in the string |
1161
|
|
|
* </p> |
1162
|
|
|
* |
1163
|
|
|
* <p> |
1164
|
|
|
* <br /> |
1165
|
|
|
* Note: It was reported that not only space characters, but all in |
1166
|
|
|
* the following pattern can be parsed as separators between a tag name |
1167
|
|
|
* and its attributes: [\d\s"\'`;,\/\=\(\x00\x0B\x09\x0C] |
1168
|
|
|
* ... however, UTF8::clean() above already strips the |
1169
|
|
|
* hex-encoded ones, so we'll skip them below. |
1170
|
|
|
* </p> |
1171
|
|
|
* |
1172
|
|
|
* @param string $str |
1173
|
|
|
* |
1174
|
|
|
* @return string |
1175
|
|
|
*/ |
1176
|
99 |
|
private function _remove_disallowed_javascript($str) |
1177
|
|
|
{ |
1178
|
|
|
do { |
1179
|
99 |
|
$original = $str; |
1180
|
|
|
|
1181
|
99 |
|
if (\stripos($str, '<a') !== false) { |
1182
|
27 |
|
$str = (string) \preg_replace_callback( |
1183
|
27 |
|
'#<a[^\p{L}@>]+([^>]*?)(?:>|$)#iu', |
1184
|
|
|
function ($matches) { |
1185
|
25 |
|
return $this->_js_link_removal_callback($matches); |
1186
|
27 |
|
}, |
1187
|
27 |
|
$str |
1188
|
|
|
); |
1189
|
|
|
} |
1190
|
|
|
|
1191
|
99 |
|
if (\stripos($str, '<img') !== false) { |
1192
|
29 |
|
$str = (string) \preg_replace_callback( |
1193
|
29 |
|
'#<img[^\p{L}@]+([^>]*?)(?:\s?/?>|$)#iu', |
1194
|
|
|
function ($matches) { |
1195
|
|
|
if ( |
1196
|
29 |
|
\strpos($matches[1], 'base64') !== false |
1197
|
|
|
&& |
1198
|
29 |
|
\preg_match("/([\"'])?data\s*:\s*(?:image\s*\/.*)[^\1]*base64[^\1]*,[^\1]*\1?/iUus", $matches[1]) |
1199
|
|
|
) { |
1200
|
2 |
|
return $matches[0]; |
1201
|
|
|
} |
1202
|
|
|
|
1203
|
28 |
|
return $this->_js_src_removal_callback($matches); |
1204
|
29 |
|
}, |
1205
|
29 |
|
$str |
1206
|
|
|
); |
1207
|
|
|
} |
1208
|
|
|
|
1209
|
99 |
View Code Duplication |
if (\stripos($str, '<audio') !== false) { |
|
|
|
|
1210
|
3 |
|
$str = (string) \preg_replace_callback( |
1211
|
3 |
|
'#<audio[^\p{L}@]+([^>]*?)(?:\s?/?>|$)#iu', |
1212
|
|
|
function ($matches) { |
1213
|
3 |
|
return $this->_js_src_removal_callback($matches); |
1214
|
3 |
|
}, |
1215
|
3 |
|
$str |
1216
|
|
|
); |
1217
|
|
|
} |
1218
|
|
|
|
1219
|
99 |
View Code Duplication |
if (\stripos($str, '<video') !== false) { |
|
|
|
|
1220
|
5 |
|
$str = (string) \preg_replace_callback( |
1221
|
5 |
|
'#<video[^\p{L}@]+([^>]*?)(?:\s?/?>|$)#iu', |
1222
|
|
|
function ($matches) { |
1223
|
4 |
|
return $this->_js_src_removal_callback($matches); |
1224
|
5 |
|
}, |
1225
|
5 |
|
$str |
1226
|
|
|
); |
1227
|
|
|
} |
1228
|
|
|
|
1229
|
99 |
View Code Duplication |
if (\stripos($str, '<source') !== false) { |
|
|
|
|
1230
|
3 |
|
$str = (string) \preg_replace_callback( |
1231
|
3 |
|
'#<source[^\p{L}@]+([^>]*?)(?:\s?/?>|$)#iu', |
1232
|
|
|
function ($matches) { |
1233
|
3 |
|
return $this->_js_src_removal_callback($matches); |
1234
|
3 |
|
}, |
1235
|
3 |
|
$str |
1236
|
|
|
); |
1237
|
|
|
} |
1238
|
|
|
|
1239
|
99 |
View Code Duplication |
if (\stripos($str, 'script') !== false) { |
|
|
|
|
1240
|
|
|
// INFO: US-ASCII: ¼ === < |
1241
|
57 |
|
$str = (string) \preg_replace( |
1242
|
57 |
|
'#(?:%3C|¼|<)\s*script[^\p{L}@]+(?:[^>]*)(?:\s?/?(?:%3E|¾|>)|$)#iu', |
1243
|
57 |
|
$this->_replacement, |
1244
|
57 |
|
$str |
1245
|
|
|
); |
1246
|
|
|
} |
1247
|
|
|
|
1248
|
99 |
View Code Duplication |
if (\stripos($str, 'script') !== false) { |
|
|
|
|
1249
|
|
|
// INFO: US-ASCII: ¼ === < |
1250
|
50 |
|
$str = (string) \preg_replace( |
1251
|
50 |
|
'#(?:%3C|¼|<)[^\p{L}@]*/*[^\p{L}@]*(?:script[^\p{L}@]+).*(?:%3E|¾|>)?#iUus', |
1252
|
50 |
|
$this->_replacement, |
1253
|
50 |
|
$str |
1254
|
|
|
); |
1255
|
|
|
} |
1256
|
99 |
|
} while ($original !== $str); |
1257
|
|
|
|
1258
|
99 |
|
return (string) $str; |
1259
|
|
|
} |
1260
|
|
|
|
1261
|
|
|
/** |
1262
|
|
|
* Remove Evil HTML Attributes (like event handlers and style). |
1263
|
|
|
* |
1264
|
|
|
* It removes the evil attribute and either: |
1265
|
|
|
* |
1266
|
|
|
* - Everything up until a space. For example, everything between the pipes: |
1267
|
|
|
* |
1268
|
|
|
* <code> |
1269
|
|
|
* <a |style=document.write('hello');alert('world');| class=link> |
1270
|
|
|
* </code> |
1271
|
|
|
* |
1272
|
|
|
* - Everything inside the quotes. For example, everything between the pipes: |
1273
|
|
|
* |
1274
|
|
|
* <code> |
1275
|
|
|
* <a |style="document.write('hello'); alert('world');"| class="link"> |
1276
|
|
|
* </code> |
1277
|
|
|
* |
1278
|
|
|
* @param string $str <p>The string to check.</p> |
1279
|
|
|
* |
1280
|
|
|
* @return string the string with the evil attributes removed |
1281
|
|
|
*/ |
1282
|
99 |
|
private function _remove_evil_attributes($str) |
1283
|
|
|
{ |
1284
|
|
|
// replace style-attribute, first (if needed) |
1285
|
|
|
if ( |
1286
|
99 |
|
\stripos($str, 'style') !== false |
1287
|
|
|
&& |
1288
|
99 |
|
\in_array('style', $this->_evil_attributes_regex, true) |
1289
|
|
|
) { |
1290
|
|
|
do { |
1291
|
19 |
|
$count = $temp_count = 0; |
1292
|
|
|
|
1293
|
19 |
|
$str = (string) \preg_replace( |
1294
|
19 |
|
'/(<[^>]+)(?<!\p{L})(style\s*=\s*"(?:[^"]*?)"|style\s*=\s*\'(?:[^\']*?)\')/iu', |
1295
|
19 |
|
'$1' . $this->_replacement, |
1296
|
19 |
|
$str, |
1297
|
19 |
|
-1, |
1298
|
19 |
|
$temp_count |
1299
|
|
|
); |
1300
|
19 |
|
$count += $temp_count; |
1301
|
19 |
|
} while ($count); |
1302
|
|
|
} |
1303
|
|
|
|
1304
|
99 |
|
if (!$this->_cache_evil_attributes_regex_string) { |
1305
|
99 |
|
$this->_cache_evil_attributes_regex_string = \implode('|', $this->_evil_attributes_regex); |
1306
|
99 |
|
$this->_cache_evil_attributes_regex_string .= '|' . \implode('\w*|', $this->_never_allowed_on_events_afterwards); |
1307
|
|
|
} |
1308
|
|
|
|
1309
|
|
|
do { |
1310
|
99 |
|
$count = $temp_count = 0; |
1311
|
|
|
|
1312
|
|
|
// find occurrences of illegal attribute strings with and without quotes (" and ' are octal quotes) |
1313
|
99 |
|
$str = (string) \preg_replace( |
1314
|
99 |
|
'/(.*)((?:<[^>]+)(?<!\p{L}))(?:' . $this->_cache_evil_attributes_regex_string . ')(?:\s*=\s*)(?:\'(?:.*?)\'|"(?:.*?)")(.*)/ius', |
1315
|
99 |
|
'$1$2' . $this->_replacement . '$3$4', |
1316
|
99 |
|
$str, |
1317
|
99 |
|
-1, |
1318
|
99 |
|
$temp_count |
1319
|
|
|
); |
1320
|
99 |
|
$count += $temp_count; |
1321
|
|
|
|
1322
|
99 |
|
$str = (string) \preg_replace( |
1323
|
99 |
|
'/(.*?)(<[^>]+)(?<!\p{L})(?:' . $this->_cache_evil_attributes_regex_string . ')\s*=\s*(?:[^\s>]*)(.*?)/ius', |
1324
|
99 |
|
'$1$2' . $this->_replacement . '$3', |
1325
|
99 |
|
$str, |
1326
|
99 |
|
-1, |
1327
|
99 |
|
$temp_count |
1328
|
|
|
); |
1329
|
99 |
|
$count += $temp_count; |
1330
|
99 |
|
} while ($count); |
1331
|
|
|
|
1332
|
99 |
|
return (string) $str; |
1333
|
|
|
} |
1334
|
|
|
|
1335
|
|
|
/** |
1336
|
|
|
* UTF-7 decoding function. |
1337
|
|
|
* |
1338
|
|
|
* @param string $str <p>HTML document for recode ASCII part of UTF-7 back to ASCII.</p> |
1339
|
|
|
* |
1340
|
|
|
* @return string |
1341
|
|
|
*/ |
1342
|
99 |
|
private function _repack_utf7(string $str) |
1343
|
|
|
{ |
1344
|
99 |
|
if (\strpos($str, '-') === false) { |
1345
|
93 |
|
return $str; |
1346
|
|
|
} |
1347
|
|
|
|
1348
|
38 |
|
return (string) \preg_replace_callback( |
1349
|
38 |
|
'#\+([\p{L}0-9]+)-#iu', |
1350
|
|
|
function ($matches) { |
1351
|
4 |
|
return $this->_repack_utf7_callback($matches); |
1352
|
38 |
|
}, |
1353
|
38 |
|
$str |
1354
|
|
|
); |
1355
|
|
|
} |
1356
|
|
|
|
1357
|
|
|
/** |
1358
|
|
|
* Additional UTF-7 decoding function. |
1359
|
|
|
* |
1360
|
|
|
* @param string[] $strings <p>Array of strings for recode ASCII part of UTF-7 back to ASCII.</p> |
1361
|
|
|
* |
1362
|
|
|
* @return string |
1363
|
|
|
*/ |
1364
|
4 |
|
private function _repack_utf7_callback(array $strings) |
1365
|
|
|
{ |
1366
|
4 |
|
$strTmp = \base64_decode($strings[1], true); |
1367
|
|
|
|
1368
|
4 |
|
if ($strTmp === false) { |
1369
|
|
|
return $strings[0]; |
1370
|
|
|
} |
1371
|
|
|
|
1372
|
4 |
|
if (\rtrim(\base64_encode($strTmp), '=') !== \rtrim($strings[1], '=')) { |
1373
|
1 |
|
return $strings[0]; |
1374
|
|
|
} |
1375
|
|
|
|
1376
|
3 |
|
$string = (string) \preg_replace_callback( |
1377
|
3 |
|
'/^((?:\x00.)*?)((?:[^\x00].)+)/us', |
1378
|
|
|
function ($matches) { |
1379
|
|
|
return $this->_repack_utf7_callback_back($matches); |
1380
|
3 |
|
}, |
1381
|
3 |
|
$strTmp |
1382
|
|
|
); |
1383
|
|
|
|
1384
|
3 |
|
return (string) \preg_replace( |
1385
|
3 |
|
'/\x00(.)/us', |
1386
|
3 |
|
'$1', |
1387
|
3 |
|
$string |
1388
|
|
|
); |
1389
|
|
|
} |
1390
|
|
|
|
1391
|
|
|
/** |
1392
|
|
|
* Additional UTF-7 encoding function. |
1393
|
|
|
* |
1394
|
|
|
* @param string $str <p>String for recode ASCII part of UTF-7 back to ASCII.</p> |
1395
|
|
|
* |
1396
|
|
|
* @return string |
1397
|
|
|
*/ |
1398
|
|
|
private function _repack_utf7_callback_back($str) |
1399
|
|
|
{ |
1400
|
|
|
return $str[1] . '+' . \rtrim(\base64_encode($str[2]), '=') . '-'; |
1401
|
|
|
} |
1402
|
|
|
|
1403
|
|
|
/** |
1404
|
|
|
* Sanitize naughty HTML elements. |
1405
|
|
|
* |
1406
|
|
|
* <p> |
1407
|
|
|
* <br /> |
1408
|
|
|
* |
1409
|
|
|
* If a tag containing any of the words in the list |
1410
|
|
|
* below is found, the tag gets converted to entities. |
1411
|
|
|
* |
1412
|
|
|
* <br /><br /> |
1413
|
|
|
* |
1414
|
|
|
* So this: <blink> |
1415
|
|
|
* <br /> |
1416
|
|
|
* Becomes: <blink> |
1417
|
|
|
* </p> |
1418
|
|
|
* |
1419
|
|
|
* @param string $str |
1420
|
|
|
* |
1421
|
|
|
* @return string |
1422
|
|
|
*/ |
1423
|
99 |
|
private function _sanitize_naughty_html($str) |
1424
|
|
|
{ |
1425
|
|
|
// init |
1426
|
99 |
|
$strEnd = ''; |
1427
|
|
|
|
1428
|
|
|
do { |
1429
|
99 |
|
$original = $str; |
1430
|
|
|
|
1431
|
|
|
if ( |
1432
|
99 |
|
\strpos($str, '<') === false |
1433
|
|
|
&& |
1434
|
99 |
|
\strpos($str, '>') === false |
1435
|
|
|
) { |
1436
|
72 |
|
return $str; |
1437
|
|
|
} |
1438
|
|
|
|
1439
|
90 |
|
if (!$this->_cache__evil_html_tags_str) { |
1440
|
90 |
|
$this->_cache__evil_html_tags_str = \implode('|', $this->_evil_html_tags); |
1441
|
|
|
} |
1442
|
|
|
|
1443
|
90 |
|
$str = (string) \preg_replace_callback( |
1444
|
90 |
|
'#<(?<start>/*\s*)(?<tagName>' . $this->_cache__evil_html_tags_str . ')(?<end>[^><]*)(?<rest>[><]*)#ius', |
1445
|
|
|
function ($matches) { |
1446
|
51 |
|
return $this->_sanitize_naughty_html_callback($matches); |
1447
|
90 |
|
}, |
1448
|
90 |
|
$str |
1449
|
|
|
); |
1450
|
|
|
|
1451
|
90 |
|
if (\strpos($str, '<') === false) { |
1452
|
44 |
|
return $str; |
1453
|
|
|
} |
1454
|
|
|
|
1455
|
|
|
if ( |
1456
|
72 |
|
$this->_xss_found |
1457
|
|
|
&& |
1458
|
72 |
|
\trim($str) === '<' |
1459
|
|
|
) { |
1460
|
2 |
|
return ''; |
1461
|
|
|
} |
1462
|
|
|
|
1463
|
72 |
|
$str = (string) \preg_replace_callback( |
1464
|
72 |
|
'#<(?!!--|!\[)((?<start>/*\s*)((?<tagName>[\p{L}:]+)(?=[^\p{L}]|$|)|.+)[^\s"\'\p{L}>/=]*[^>]*)(?<closeTag>>)?#iusS', // tags without comments |
1465
|
|
|
function ($matches) { |
1466
|
|
|
if ( |
1467
|
72 |
|
$this->_do_not_close_html_tags !== [] |
1468
|
|
|
&& |
1469
|
72 |
|
isset($matches['tagName']) |
1470
|
|
|
&& |
1471
|
72 |
|
\in_array($matches['tagName'], $this->_do_not_close_html_tags, true) |
1472
|
|
|
) { |
1473
|
1 |
|
return $matches[0]; |
1474
|
|
|
} |
1475
|
|
|
|
1476
|
72 |
|
return $this->_close_html_callback($matches); |
1477
|
72 |
|
}, |
1478
|
72 |
|
$str |
1479
|
|
|
); |
1480
|
|
|
|
1481
|
72 |
|
if ($str === $strEnd) { |
1482
|
21 |
|
return (string) $str; |
1483
|
|
|
} |
1484
|
|
|
|
1485
|
72 |
|
$strEnd = $str; |
1486
|
72 |
|
} while ($original !== $str); |
1487
|
|
|
|
1488
|
60 |
|
return (string) $str; |
1489
|
|
|
} |
1490
|
|
|
|
1491
|
|
|
/** |
1492
|
|
|
* @param string[] $matches |
1493
|
|
|
* |
1494
|
|
|
* @return mixed|string |
1495
|
|
|
*/ |
1496
|
72 |
|
private function _close_html_callback(array $matches) |
1497
|
|
|
{ |
1498
|
72 |
|
if (empty($matches['closeTag'])) { |
1499
|
|
|
// allow e.g. "< $2.20" |
1500
|
21 |
|
if (\preg_match('/^[ .,\d=%€$₢₣£₤₶ℳ₥₦₧₨රුரூ௹रू₹૱₩₪₸₫֏₭₺₼₮₯₰₷₱﷼₲₾₳₴₽₵₡¢¥円৳元៛₠¤฿؋]*$/u', $matches[1])) { |
1501
|
3 |
|
return '<' . \str_replace(['>', '<'], ['>', '<'], $matches[1]); |
1502
|
|
|
} |
1503
|
|
|
|
1504
|
20 |
|
return '<' . \str_replace(['>', '<'], ['>', '<'], $matches[1]); |
1505
|
|
|
} |
1506
|
|
|
|
1507
|
67 |
|
return '<' . \str_replace(['>', '<'], ['>', '<'], $matches[1]) . '>'; |
1508
|
|
|
} |
1509
|
|
|
|
1510
|
|
|
/** |
1511
|
|
|
* Sanitize naughty HTML. |
1512
|
|
|
* |
1513
|
|
|
* <p> |
1514
|
|
|
* <br /> |
1515
|
|
|
* Callback method for AntiXSS->sanitize_naughty_html() to remove naughty HTML elements. |
1516
|
|
|
* </p> |
1517
|
|
|
* |
1518
|
|
|
* @param string[] $matches |
1519
|
|
|
* |
1520
|
|
|
* @return string |
1521
|
|
|
*/ |
1522
|
51 |
|
private function _sanitize_naughty_html_callback(array $matches) |
1523
|
|
|
{ |
1524
|
51 |
|
$fullMatch = $matches[0]; |
1525
|
|
|
|
1526
|
|
|
// skip some edge-cases |
1527
|
|
|
/** @noinspection NotOptimalIfConditionsInspection */ |
1528
|
|
|
if ( |
1529
|
|
|
( |
1530
|
51 |
|
\strpos($fullMatch, '=') === false |
1531
|
|
|
&& |
1532
|
51 |
|
\strpos($fullMatch, ' ') === false |
1533
|
|
|
&& |
1534
|
51 |
|
\strpos($fullMatch, ':') === false |
1535
|
|
|
&& |
1536
|
51 |
|
\strpos($fullMatch, '/') === false |
1537
|
|
|
&& |
1538
|
51 |
|
\strpos($fullMatch, '\\') === false |
1539
|
|
|
&& |
1540
|
51 |
|
\stripos($fullMatch, '<' . $matches['tagName'] . '>') !== 0 |
1541
|
|
|
&& |
1542
|
51 |
|
\stripos($fullMatch, '</' . $matches['tagName'] . '>') !== 0 |
1543
|
|
|
&& |
1544
|
6 |
|
\stripos($fullMatch, '<' . $matches['tagName'] . '<') !== 0 |
1545
|
|
|
) |
1546
|
|
|
|| |
1547
|
51 |
|
\preg_match('/<[\/]?' . $matches['tagName'] . '\p{L}+>/ius', $fullMatch) === 1 |
1548
|
|
|
) { |
1549
|
6 |
|
return $fullMatch; |
1550
|
|
|
} |
1551
|
|
|
|
1552
|
48 |
|
return '<' . $matches['start'] . $matches['tagName'] . $matches['end'] // encode opening brace |
1553
|
|
|
// encode captured opening or closing brace to prevent recursive vectors |
1554
|
48 |
|
. \str_replace( |
1555
|
|
|
[ |
1556
|
48 |
|
'>', |
1557
|
|
|
], |
1558
|
|
|
[ |
1559
|
48 |
|
'>', |
1560
|
|
|
], |
1561
|
48 |
|
$matches['rest'] |
1562
|
|
|
); |
1563
|
|
|
} |
1564
|
|
|
|
1565
|
|
|
/** |
1566
|
|
|
* Sanitize naughty scripting elements |
1567
|
|
|
* |
1568
|
|
|
* <p> |
1569
|
|
|
* <br /> |
1570
|
|
|
* |
1571
|
|
|
* Similar to above, only instead of looking for |
1572
|
|
|
* tags it looks for PHP and JavaScript commands |
1573
|
|
|
* that are disallowed. Rather than removing the |
1574
|
|
|
* code, it simply converts the parenthesis to entities |
1575
|
|
|
* rendering the code un-executable. |
1576
|
|
|
* |
1577
|
|
|
* <br /><br /> |
1578
|
|
|
* |
1579
|
|
|
* For example: <pre>eval('some code')</pre> |
1580
|
|
|
* <br /> |
1581
|
|
|
* Becomes: <pre>eval('some code')</pre> |
1582
|
|
|
* </p> |
1583
|
|
|
* |
1584
|
|
|
* @param string $str |
1585
|
|
|
* |
1586
|
|
|
* @return string |
1587
|
|
|
*/ |
1588
|
99 |
|
private function _sanitize_naughty_javascript($str) |
1589
|
|
|
{ |
1590
|
99 |
|
if (\strpos($str, '(') !== false) { |
1591
|
|
|
$patterns = [ |
1592
|
52 |
|
'alert', |
1593
|
|
|
'prompt', |
1594
|
|
|
'confirm', |
1595
|
|
|
'cmd', |
1596
|
|
|
'passthru', |
1597
|
|
|
'eval', |
1598
|
|
|
'exec', |
1599
|
|
|
'execScript', |
1600
|
|
|
'setTimeout', |
1601
|
|
|
'setInterval', |
1602
|
|
|
'setImmediate', |
1603
|
|
|
'expression', |
1604
|
|
|
'system', |
1605
|
|
|
'fopen', |
1606
|
|
|
'fsockopen', |
1607
|
|
|
'file', |
1608
|
|
|
'file_get_contents', |
1609
|
|
|
'readfile', |
1610
|
|
|
'unlink', |
1611
|
|
|
]; |
1612
|
|
|
|
1613
|
52 |
|
$found = false; |
1614
|
52 |
|
foreach ($patterns as $pattern) { |
1615
|
52 |
|
if (\strpos($str, $pattern) !== false) { |
1616
|
33 |
|
$found = true; |
1617
|
|
|
|
1618
|
52 |
|
break; |
1619
|
|
|
} |
1620
|
|
|
} |
1621
|
|
|
|
1622
|
52 |
|
if ($found === true) { |
1623
|
33 |
|
$str = (string) \preg_replace( |
1624
|
33 |
|
'#(' . \implode('|', $patterns) . ')(\s*)\((.*)\)#uisU', |
1625
|
33 |
|
'\\1\\2(\\3)', |
1626
|
33 |
|
$str |
1627
|
|
|
); |
1628
|
|
|
} |
1629
|
|
|
} |
1630
|
|
|
|
1631
|
99 |
|
return (string) $str; |
1632
|
|
|
} |
1633
|
|
|
|
1634
|
|
|
/** |
1635
|
|
|
* Add some strings to the "_evil_attributes"-array. |
1636
|
|
|
* |
1637
|
|
|
* @param string[] $strings |
1638
|
|
|
* |
1639
|
|
|
* @return $this |
1640
|
|
|
*/ |
1641
|
2 |
|
public function addEvilAttributes(array $strings): self |
1642
|
|
|
{ |
1643
|
2 |
|
if ($strings === []) { |
1644
|
|
|
return $this; |
1645
|
|
|
} |
1646
|
|
|
|
1647
|
|
|
// reset |
1648
|
2 |
|
$this->_cache_evil_attributes_regex_string = ''; |
1649
|
|
|
|
1650
|
2 |
|
$this->_evil_attributes_regex = \array_merge( |
1651
|
2 |
|
$strings, |
1652
|
2 |
|
$this->_evil_attributes_regex |
1653
|
|
|
); |
1654
|
|
|
|
1655
|
2 |
|
return $this; |
1656
|
|
|
} |
1657
|
|
|
|
1658
|
|
|
/** |
1659
|
|
|
* Add some strings to the "_evil_html_tags"-array. |
1660
|
|
|
* |
1661
|
|
|
* @param string[] $strings |
1662
|
|
|
* |
1663
|
|
|
* @return $this |
1664
|
|
|
*/ |
1665
|
1 |
|
public function addEvilHtmlTags(array $strings): self |
1666
|
|
|
{ |
1667
|
1 |
|
if ($strings === []) { |
1668
|
|
|
return $this; |
1669
|
|
|
} |
1670
|
|
|
|
1671
|
|
|
// reset |
1672
|
1 |
|
$this->_cache__evil_html_tags_str = ''; |
1673
|
|
|
|
1674
|
1 |
|
$this->_evil_html_tags = \array_merge( |
1675
|
1 |
|
$strings, |
1676
|
1 |
|
$this->_evil_html_tags |
1677
|
|
|
); |
1678
|
|
|
|
1679
|
1 |
|
return $this; |
1680
|
|
|
} |
1681
|
|
|
|
1682
|
|
|
/** |
1683
|
|
|
* Add some strings to the "_never_allowed_regex"-array. |
1684
|
|
|
* |
1685
|
|
|
* @param string[] $strings |
1686
|
|
|
* |
1687
|
|
|
* @return $this |
1688
|
|
|
*/ |
1689
|
1 |
|
public function addNeverAllowedRegex(array $strings): self |
1690
|
|
|
{ |
1691
|
1 |
|
if ($strings === []) { |
1692
|
|
|
return $this; |
1693
|
|
|
} |
1694
|
|
|
|
1695
|
|
|
// reset |
1696
|
1 |
|
$this->_cache_never_allowed_regex_string = ''; |
1697
|
|
|
|
1698
|
1 |
|
$this->_never_allowed_regex = \array_merge( |
1699
|
1 |
|
$strings, |
1700
|
1 |
|
$this->_never_allowed_regex |
1701
|
|
|
); |
1702
|
|
|
|
1703
|
1 |
|
return $this; |
1704
|
|
|
} |
1705
|
|
|
|
1706
|
|
|
/** |
1707
|
|
|
* Remove some strings from the "_never_allowed_regex"-array. |
1708
|
|
|
* |
1709
|
|
|
* <p> |
1710
|
|
|
* <br /> |
1711
|
|
|
* WARNING: Use this method only if you have a really good reason. |
1712
|
|
|
* </p> |
1713
|
|
|
* |
1714
|
|
|
* @param string[] $strings |
1715
|
|
|
* |
1716
|
|
|
* @return $this |
1717
|
|
|
*/ |
1718
|
2 |
View Code Duplication |
public function removeNeverAllowedRegex(array $strings): self |
|
|
|
|
1719
|
|
|
{ |
1720
|
2 |
|
if ($strings === []) { |
1721
|
|
|
return $this; |
1722
|
|
|
} |
1723
|
|
|
|
1724
|
|
|
// reset |
1725
|
2 |
|
$this->_cache_never_allowed_regex_string = ''; |
1726
|
|
|
|
1727
|
2 |
|
$this->_never_allowed_regex = \array_diff( |
1728
|
2 |
|
$this->_never_allowed_regex, |
1729
|
2 |
|
\array_intersect($strings, $this->_never_allowed_regex) |
1730
|
|
|
); |
1731
|
|
|
|
1732
|
2 |
|
return $this; |
1733
|
|
|
} |
1734
|
|
|
|
1735
|
|
|
/** |
1736
|
|
|
* Add some strings to the "_never_allowed_on_events_afterwards"-array. |
1737
|
|
|
* |
1738
|
|
|
* @param string[] $strings |
1739
|
|
|
* |
1740
|
|
|
* @return $this |
1741
|
|
|
*/ |
1742
|
1 |
View Code Duplication |
public function addNeverAllowedOnEventsAfterwards(array $strings): self |
|
|
|
|
1743
|
|
|
{ |
1744
|
1 |
|
if ($strings === []) { |
1745
|
|
|
return $this; |
1746
|
|
|
} |
1747
|
|
|
|
1748
|
|
|
// reset |
1749
|
1 |
|
$this->_cache_evil_attributes_regex_string = ''; |
1750
|
|
|
|
1751
|
1 |
|
$this->_never_allowed_on_events_afterwards = \array_merge( |
1752
|
1 |
|
$strings, |
1753
|
1 |
|
$this->_never_allowed_on_events_afterwards |
1754
|
|
|
); |
1755
|
|
|
|
1756
|
1 |
|
return $this; |
1757
|
|
|
} |
1758
|
|
|
|
1759
|
|
|
/** |
1760
|
|
|
* Add some strings to the "_never_allowed_str_afterwards"-array. |
1761
|
|
|
* |
1762
|
|
|
* @param string[] $strings |
1763
|
|
|
* |
1764
|
|
|
* @return $this |
1765
|
|
|
*/ |
1766
|
1 |
|
public function addNeverAllowedStrAfterwards(array $strings): self |
1767
|
|
|
{ |
1768
|
1 |
|
if ($strings === []) { |
1769
|
|
|
return $this; |
1770
|
|
|
} |
1771
|
|
|
|
1772
|
1 |
|
$this->_never_allowed_str_afterwards = \array_merge( |
1773
|
1 |
|
$strings, |
1774
|
1 |
|
$this->_never_allowed_str_afterwards |
1775
|
|
|
); |
1776
|
|
|
|
1777
|
1 |
|
return $this; |
1778
|
|
|
} |
1779
|
|
|
|
1780
|
|
|
/** |
1781
|
|
|
* Add some strings to the "_do_not_close_html_tags"-array. |
1782
|
|
|
* |
1783
|
|
|
* @param string[] $strings |
1784
|
|
|
* |
1785
|
|
|
* @return $this |
1786
|
|
|
*/ |
1787
|
1 |
|
public function addDoNotCloseHtmlTags(array $strings): self |
1788
|
|
|
{ |
1789
|
1 |
|
if ($strings === []) { |
1790
|
|
|
return $this; |
1791
|
|
|
} |
1792
|
|
|
|
1793
|
1 |
|
$this->_do_not_close_html_tags = \array_merge( |
1794
|
1 |
|
$strings, |
1795
|
1 |
|
$this->_do_not_close_html_tags |
1796
|
|
|
); |
1797
|
|
|
|
1798
|
1 |
|
return $this; |
1799
|
|
|
} |
1800
|
|
|
|
1801
|
|
|
/** |
1802
|
|
|
* Remove some strings from the "_do_not_close_html_tags"-array. |
1803
|
|
|
* |
1804
|
|
|
* <p> |
1805
|
|
|
* <br /> |
1806
|
|
|
* WARNING: Use this method only if you have a really good reason. |
1807
|
|
|
* </p> |
1808
|
|
|
* |
1809
|
|
|
* @param string[] $strings |
1810
|
|
|
* |
1811
|
|
|
* @return $this |
1812
|
|
|
*/ |
1813
|
|
View Code Duplication |
public function removeDoNotCloseHtmlTags(array $strings): self |
|
|
|
|
1814
|
|
|
{ |
1815
|
|
|
if ($strings === []) { |
1816
|
|
|
return $this; |
1817
|
|
|
} |
1818
|
|
|
|
1819
|
|
|
$this->_do_not_close_html_tags = \array_diff( |
1820
|
|
|
$this->_do_not_close_html_tags, |
1821
|
|
|
\array_intersect($strings, $this->_do_not_close_html_tags) |
1822
|
|
|
); |
1823
|
|
|
|
1824
|
|
|
return $this; |
1825
|
|
|
} |
1826
|
|
|
|
1827
|
|
|
/** |
1828
|
|
|
* Check if the "AntiXSS->xss_clean()"-method found an XSS attack in the last run. |
1829
|
|
|
* |
1830
|
|
|
* @return bool|null will return null if the "xss_clean()" wan't running at all |
1831
|
|
|
*/ |
1832
|
64 |
|
public function isXssFound() |
1833
|
|
|
{ |
1834
|
64 |
|
return $this->_xss_found; |
1835
|
|
|
} |
1836
|
|
|
|
1837
|
|
|
/** |
1838
|
|
|
* Remove some strings from the "_evil_attributes"-array. |
1839
|
|
|
* |
1840
|
|
|
* <p> |
1841
|
|
|
* <br /> |
1842
|
|
|
* WARNING: Use this method only if you have a really good reason. |
1843
|
|
|
* </p> |
1844
|
|
|
* |
1845
|
|
|
* @param string[] $strings |
1846
|
|
|
* |
1847
|
|
|
* @return $this |
1848
|
|
|
*/ |
1849
|
2 |
View Code Duplication |
public function removeEvilAttributes(array $strings): self |
|
|
|
|
1850
|
|
|
{ |
1851
|
2 |
|
if ($strings === []) { |
1852
|
|
|
return $this; |
1853
|
|
|
} |
1854
|
|
|
|
1855
|
|
|
// reset |
1856
|
2 |
|
$this->_cache_evil_attributes_regex_string = ''; |
1857
|
|
|
|
1858
|
2 |
|
$this->_evil_attributes_regex = \array_diff( |
1859
|
2 |
|
$this->_evil_attributes_regex, |
1860
|
2 |
|
\array_intersect($strings, $this->_evil_attributes_regex) |
1861
|
|
|
); |
1862
|
|
|
|
1863
|
2 |
|
return $this; |
1864
|
|
|
} |
1865
|
|
|
|
1866
|
|
|
/** |
1867
|
|
|
* Remove some strings from the "_evil_html_tags"-array. |
1868
|
|
|
* |
1869
|
|
|
* <p> |
1870
|
|
|
* <br /> |
1871
|
|
|
* WARNING: Use this method only if you have a really good reason. |
1872
|
|
|
* </p> |
1873
|
|
|
* |
1874
|
|
|
* @param string[] $strings |
1875
|
|
|
* |
1876
|
|
|
* @return $this |
1877
|
|
|
*/ |
1878
|
2 |
View Code Duplication |
public function removeEvilHtmlTags(array $strings): self |
|
|
|
|
1879
|
|
|
{ |
1880
|
2 |
|
if ($strings === []) { |
1881
|
|
|
return $this; |
1882
|
|
|
} |
1883
|
|
|
|
1884
|
|
|
// reset |
1885
|
2 |
|
$this->_cache__evil_html_tags_str = ''; |
1886
|
|
|
|
1887
|
2 |
|
$this->_evil_html_tags = \array_diff( |
1888
|
2 |
|
$this->_evil_html_tags, |
1889
|
2 |
|
\array_intersect($strings, $this->_evil_html_tags) |
1890
|
|
|
); |
1891
|
|
|
|
1892
|
2 |
|
return $this; |
1893
|
|
|
} |
1894
|
|
|
|
1895
|
|
|
/** |
1896
|
|
|
* Remove some strings from the "_never_allowed_on_events_afterwards"-array. |
1897
|
|
|
* |
1898
|
|
|
* <p> |
1899
|
|
|
* <br /> |
1900
|
|
|
* WARNING: Use this method only if you have a really good reason. |
1901
|
|
|
* </p> |
1902
|
|
|
* |
1903
|
|
|
* @param string[] $strings |
1904
|
|
|
* |
1905
|
|
|
* @return $this |
1906
|
|
|
*/ |
1907
|
1 |
View Code Duplication |
public function removeNeverAllowedOnEventsAfterwards(array $strings): self |
|
|
|
|
1908
|
|
|
{ |
1909
|
1 |
|
if ($strings === []) { |
1910
|
|
|
return $this; |
1911
|
|
|
} |
1912
|
|
|
|
1913
|
|
|
// reset |
1914
|
1 |
|
$this->_cache_evil_attributes_regex_string = ''; |
1915
|
|
|
|
1916
|
1 |
|
$this->_never_allowed_on_events_afterwards = \array_diff( |
1917
|
1 |
|
$this->_never_allowed_on_events_afterwards, |
1918
|
1 |
|
\array_intersect($strings, $this->_never_allowed_on_events_afterwards) |
1919
|
|
|
); |
1920
|
|
|
|
1921
|
1 |
|
return $this; |
1922
|
|
|
} |
1923
|
|
|
|
1924
|
|
|
/** |
1925
|
|
|
* Remove some strings from the "_never_allowed_str_afterwards"-array. |
1926
|
|
|
* |
1927
|
|
|
* <p> |
1928
|
|
|
* <br /> |
1929
|
|
|
* WARNING: Use this method only if you have a really good reason. |
1930
|
|
|
* </p> |
1931
|
|
|
* |
1932
|
|
|
* @param string[] $strings |
1933
|
|
|
* |
1934
|
|
|
* @return $this |
1935
|
|
|
*/ |
1936
|
1 |
View Code Duplication |
public function removeNeverAllowedStrAfterwards(array $strings): self |
|
|
|
|
1937
|
|
|
{ |
1938
|
1 |
|
if ($strings === []) { |
1939
|
|
|
return $this; |
1940
|
|
|
} |
1941
|
|
|
|
1942
|
1 |
|
$this->_never_allowed_str_afterwards = \array_diff( |
1943
|
1 |
|
$this->_never_allowed_str_afterwards, |
1944
|
1 |
|
\array_intersect($strings, $this->_never_allowed_str_afterwards) |
1945
|
|
|
); |
1946
|
|
|
|
1947
|
1 |
|
return $this; |
1948
|
|
|
} |
1949
|
|
|
|
1950
|
|
|
/** |
1951
|
|
|
* Set the replacement-string for not allowed strings. |
1952
|
|
|
* |
1953
|
|
|
* @param string $string |
1954
|
|
|
* |
1955
|
|
|
* @return $this |
1956
|
|
|
*/ |
1957
|
51 |
|
public function setReplacement($string): self |
1958
|
|
|
{ |
1959
|
51 |
|
$this->_replacement = (string) $string; |
1960
|
|
|
|
1961
|
51 |
|
$this->_initNeverAllowedStr(); |
1962
|
51 |
|
$this->_initNeverAllowedRegex(); |
1963
|
|
|
|
1964
|
51 |
|
return $this; |
1965
|
|
|
} |
1966
|
|
|
|
1967
|
|
|
/** |
1968
|
|
|
* Set the option to stripe 4-Byte chars. |
1969
|
|
|
* |
1970
|
|
|
* <p> |
1971
|
|
|
* <br /> |
1972
|
|
|
* INFO: use it if your DB (MySQL) can't use "utf8mb4" -> preventing stored XSS-attacks |
1973
|
|
|
* </p> |
1974
|
|
|
* |
1975
|
|
|
* @param bool $bool |
1976
|
|
|
* |
1977
|
|
|
* @return $this |
1978
|
|
|
*/ |
1979
|
1 |
|
public function setStripe4byteChars($bool): self |
1980
|
|
|
{ |
1981
|
1 |
|
$this->_stripe_4byte_chars = (bool) $bool; |
1982
|
|
|
|
1983
|
1 |
|
return $this; |
1984
|
|
|
} |
1985
|
|
|
|
1986
|
|
|
/** |
1987
|
|
|
* XSS Clean |
1988
|
|
|
* |
1989
|
|
|
* <p> |
1990
|
|
|
* <br /> |
1991
|
|
|
* Sanitizes data so that "Cross Site Scripting" hacks can be |
1992
|
|
|
* prevented. This method does a fair amount of work but |
1993
|
|
|
* it is extremely thorough, designed to prevent even the |
1994
|
|
|
* most obscure XSS attempts. But keep in mind that nothing |
1995
|
|
|
* is ever 100% foolproof... |
1996
|
|
|
* </p> |
1997
|
|
|
* |
1998
|
|
|
* <p> |
1999
|
|
|
* <br /> |
2000
|
|
|
* <strong>Note:</strong> Should only be used to deal with data upon submission. |
2001
|
|
|
* It's not something that should be used for general |
2002
|
|
|
* runtime processing. |
2003
|
|
|
* </p> |
2004
|
|
|
* |
2005
|
|
|
* @see http://channel.bitflux.ch/wiki/XSS_Prevention |
2006
|
|
|
* Based in part on some code and ideas from Bitflux. |
2007
|
|
|
* @see http://ha.ckers.org/xss.html |
2008
|
|
|
* To help develop this script I used this great list of |
2009
|
|
|
* vulnerabilities along with a few other hacks I've |
2010
|
|
|
* harvested from examining vulnerabilities in other programs. |
2011
|
|
|
* |
2012
|
|
|
* @param array|mixed $str <p>input data e.g. string or array of strings</p> |
2013
|
|
|
* |
2014
|
|
|
* @return mixed |
2015
|
|
|
*/ |
2016
|
99 |
|
public function xss_clean($str) |
2017
|
|
|
{ |
2018
|
|
|
// reset |
2019
|
99 |
|
$this->_xss_found = null; |
2020
|
|
|
|
2021
|
|
|
// check for an array of strings |
2022
|
99 |
|
if (\is_array($str)) { |
2023
|
3 |
|
foreach ($str as $key => $value) { |
2024
|
3 |
|
$str[$key] = $this->xss_clean($value); |
2025
|
|
|
} |
2026
|
|
|
|
2027
|
3 |
|
return $str; |
2028
|
|
|
} |
2029
|
|
|
|
2030
|
99 |
|
$old_str_backup = $str; |
2031
|
|
|
|
2032
|
|
|
// process |
2033
|
|
|
do { |
2034
|
99 |
|
$old_str = $str; |
2035
|
99 |
|
$str = $this->_do($str); |
2036
|
99 |
|
} while ($old_str !== $str); |
2037
|
|
|
|
2038
|
|
|
// keep the old value, if there wasn't any XSS attack |
2039
|
99 |
|
if ($this->_xss_found !== true) { |
2040
|
55 |
|
$str = $old_str_backup; |
2041
|
|
|
} |
2042
|
|
|
|
2043
|
99 |
|
return $str; |
2044
|
|
|
} |
2045
|
|
|
} |
2046
|
|
|
|
Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.
You can also find more detailed suggestions in the “Code” section of your repository.