Completed
Push — master ( 30b958...2979ab )
by Lars
01:18
created

AntiXSS::_sanitize_naughty_html()   C

Complexity

Conditions 12
Paths 9

Size

Total Lines 67

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 32
CRAP Score 12

Importance

Changes 0
Metric Value
dl 0
loc 67
ccs 32
cts 32
cp 1
rs 6.2933
c 0
b 0
f 0
cc 12
nc 9
nop 1
crap 12

How to fix   Long Method    Complexity   

Long Method

Small methods make your code easier to understand, in particular if combined with a good name. Besides, if your method is small, finding a good name is usually much easier.

For example, if you find yourself adding comments to a method's body, this is usually a good sign to extract the commented part to a new method, and use the comment as a starting point when coming up with a good name for this new method.

Commonly applied refactorings include:

1
<?php
2
3
/** @noinspection ReturnTypeCanBeDeclaredInspection */
4
5
declare(strict_types=1);
6
7
namespace voku\helper;
8
9
use const ENT_DISALLOWED;
10
use const ENT_HTML5;
11
use const ENT_QUOTES;
12
use const ENT_SUBSTITUTE;
13
use const HTML_ENTITIES;
14
15
/**
16
 * AntiXSS
17
 *
18
 * ported from "CodeIgniter"
19
 *
20
 * @copyright   Copyright (c) 2008 - 2014, EllisLab, Inc. (http://ellislab.com/)
21
 * @copyright   Copyright (c) 2014 - 2015, British Columbia Institute of Technology (http://bcit.ca/)
22
 * @copyright   Copyright (c) 2015 - 2020, Lars Moelleken (https://moelleken.org/)
23
 * @license     http://opensource.org/licenses/MIT	MIT License
24
 */
25
final class AntiXSS
26
{
27
    const VOKU_ANTI_XSS_GT = 'voku::anti-xss::gt';
28
29
    const VOKU_ANTI_XSS_LT = 'voku::anti-xss::lt';
30
31
    const VOKU_ANTI_XSS_STYLE = 'voku::anti-xss::STYLE';
32
33
    /**
34
     * List of never allowed regex replacements.
35
     *
36
     * @var string[]
37
     */
38
    private $_never_allowed_regex = [];
39
40
    /**
41
     * List of html tags that will not closed automatically.
42
     *
43
     * @var string[]
44
     */
45
    private $_do_not_close_html_tags = [];
46
47
    /**
48
     * List of never allowed call statements.
49
     *
50
     * @var string[]
51
     */
52
    private static $_never_allowed_call = [
53
        // default javascript
54
        'javascript',
55
        // Java: jar-protocol is an XSS hazard
56
        'jar',
57
        // Mac (will not run the script, but open it in AppleScript Editor)
58
        'applescript',
59
        // IE: https://www.owasp.org/index.php/XSS_Filter_Evasion_Cheat_Sheet#VBscript_in_an_image
60
        'vbscript',
61
        'vbs',
62
        // IE, surprise!
63
        'wscript',
64
        // IE
65
        'jscript',
66
        // https://html5sec.org/#behavior
67
        'behavior',
68
        // old Netscape
69
        'mocha',
70
        // old Netscape
71
        'livescript',
72
        // default view source
73
        'view-source',
74
    ];
75
76
    /**
77
     * @var string[]
78
     */
79
    private $_never_allowed_str_afterwards = [
80
        '&lt;script&gt;',
81
        '&lt;/script&gt;',
82
    ];
83
84
    /**
85
     * List of never allowed strings, afterwards.
86
     *
87
     * @var string[]
88
     */
89
    private $_never_allowed_on_events_afterwards = [
90
        'onAbort',
91
        'onActivate',
92
        'onAttribute',
93
        'onAfterPrint',
94
        'onAfterScriptExecute',
95
        'onAfterUpdate',
96
        'onAnimationCancel',
97
        'onAnimationEnd',
98
        'onAnimationIteration',
99
        'onAnimationStart',
100
        'onAriaRequest',
101
        'onAutoComplete',
102
        'onAutoCompleteError',
103
        'onAuxClick',
104
        'onBeforeActivate',
105
        'onBeforeCopy',
106
        'onBeforeCut',
107
        'onBeforeDeactivate',
108
        'onBeforeEditFocus',
109
        'onBeforePaste',
110
        'onBeforePrint',
111
        'onBeforeScriptExecute',
112
        'onBeforeUnload',
113
        'onBeforeUpdate',
114
        'onBegin',
115
        'onBlur',
116
        'onBounce',
117
        'onCancel',
118
        'onCanPlay',
119
        'onCanPlayThrough',
120
        'onCellChange',
121
        'onChange',
122
        'onClick',
123
        'onClose',
124
        'onCommand',
125
        'onCompassNeedsCalibration',
126
        'onContextMenu',
127
        'onControlSelect',
128
        'onCopy',
129
        'onCueChange',
130
        'onCut',
131
        'onDataAvailable',
132
        'onDataSetChanged',
133
        'onDataSetComplete',
134
        'onDblClick',
135
        'onDeactivate',
136
        'onDeviceLight',
137
        'onDeviceMotion',
138
        'onDeviceOrientation',
139
        'onDeviceProximity',
140
        'onDrag',
141
        'onDragDrop',
142
        'onDragEnd',
143
        'onDragEnter',
144
        'onDragLeave',
145
        'onDragOver',
146
        'onDragStart',
147
        'onDrop',
148
        'onDurationChange',
149
        'onEmptied',
150
        'onEnd',
151
        'onEnded',
152
        'onError',
153
        'onErrorUpdate',
154
        'onExit',
155
        'onFilterChange',
156
        'onFinish',
157
        'onFocus',
158
        'onFocusIn',
159
        'onFocusOut',
160
        'onFormChange',
161
        'onFormInput',
162
        'onFullScreenChange',
163
        'onFullScreenError',
164
        'onGotPointerCapture',
165
        'onHashChange',
166
        'onHelp',
167
        'onInput',
168
        'onInvalid',
169
        'onKeyDown',
170
        'onKeyPress',
171
        'onKeyUp',
172
        'onLanguageChange',
173
        'onLayoutComplete',
174
        'onLoad',
175
        'onLoadedData',
176
        'onLoadedMetaData',
177
        'onLoadStart',
178
        'onLoseCapture',
179
        'onLostPointerCapture',
180
        'onMediaComplete',
181
        'onMediaError',
182
        'onMessage',
183
        'onMouseDown',
184
        'onMouseEnter',
185
        'onMouseLeave',
186
        'onMouseMove',
187
        'onMouseOut',
188
        'onMouseOver',
189
        'onMouseUp',
190
        'onMouseWheel',
191
        'onMove',
192
        'onMoveEnd',
193
        'onMoveStart',
194
        'onMozFullScreenChange',
195
        'onMozFullScreenError',
196
        'onMozPointerLockChange',
197
        'onMozPointerLockError',
198
        'onMsContentZoom',
199
        'onMsFullScreenChange',
200
        'onMsFullScreenError',
201
        'onMsGestureChange',
202
        'onMsGestureDoubleTap',
203
        'onMsGestureEnd',
204
        'onMsGestureHold',
205
        'onMsGestureStart',
206
        'onMsGestureTap',
207
        'onMsGotPointerCapture',
208
        'onMsInertiaStart',
209
        'onMsLostPointerCapture',
210
        'onMsManipulationStateChanged',
211
        'onMsPointerCancel',
212
        'onMsPointerDown',
213
        'onMsPointerEnter',
214
        'onMsPointerLeave',
215
        'onMsPointerMove',
216
        'onMsPointerOut',
217
        'onMsPointerOver',
218
        'onMsPointerUp',
219
        'onMsSiteModeJumpListItemRemoved',
220
        'onMsThumbnailClick',
221
        'onOffline',
222
        'onOnline',
223
        'onOutOfSync',
224
        'onPage',
225
        'onPageHide',
226
        'onPageShow',
227
        'onPaste',
228
        'onPause',
229
        'onPlay',
230
        'onPlaying',
231
        'onPointerCancel',
232
        'onPointerDown',
233
        'onPointerEnter',
234
        'onPointerLeave',
235
        'onPointerLockChange',
236
        'onPointerLockError',
237
        'onPointerMove',
238
        'onPointerOut',
239
        'onPointerOver',
240
        'onPointerUp',
241
        'onPopState',
242
        'onProgress',
243
        'onPropertyChange',
244
        'onqt_error',
245
        'onRateChange',
246
        'onReadyStateChange',
247
        'onReceived',
248
        'onRepeat',
249
        'onReset',
250
        'onResize',
251
        'onResizeEnd',
252
        'onResizeStart',
253
        'onResume',
254
        'onReverse',
255
        'onRowDelete',
256
        'onRowEnter',
257
        'onRowExit',
258
        'onRowInserted',
259
        'onRowsDelete',
260
        'onRowsEnter',
261
        'onRowsExit',
262
        'onRowsInserted',
263
        'onScroll',
264
        'onSearch',
265
        'onSeek',
266
        'onSeeked',
267
        'onSeeking',
268
        'onSelect',
269
        'onSelectionChange',
270
        'onSelectStart',
271
        'onStalled',
272
        'onStorage',
273
        'onStorageCommit',
274
        'onStart',
275
        'onStop',
276
        'onShow',
277
        'onSyncRestored',
278
        'onSubmit',
279
        'onSuspend',
280
        'onSynchRestored',
281
        'onTimeError',
282
        'onTimeUpdate',
283
        'onTimer',
284
        'onTrackChange',
285
        'onTransitionEnd',
286
        'onToggle',
287
        'onTouchCancel',
288
        'onTouchEnd',
289
        'onTouchLeave',
290
        'onTouchMove',
291
        'onTouchStart',
292
        'onTransitionCancel',
293
        'onTransitionEnd',
294
        'onUnload',
295
        'onURLFlip',
296
        'onUserProximity',
297
        'onVolumeChange',
298
        'onWaiting',
299
        'onWebKitAnimationEnd',
300
        'onWebKitAnimationIteration',
301
        'onWebKitAnimationStart',
302
        'onWebKitFullScreenChange',
303
        'onWebKitFullScreenError',
304
        'onWebKitTransitionEnd',
305
        'onWheel',
306
    ];
307
308
    /**
309
     * https://www.owasp.org/index.php/XSS_Filter_Evasion_Cheat_Sheet#Event_Handlers
310
     *
311
     * @var string[]
312
     */
313
    private $_evil_attributes_regex = [
314
        'style',
315
        'xmlns:xdp',
316
        'formaction',
317
        'form',
318
        'xlink:href',
319
        'seekSegmentTime',
320
        'FSCommand',
321
    ];
322
323
    /**
324
     * @var string[]
325
     */
326
    private $_evil_html_tags = [
327
        'applet',
328
        'audio',
329
        'basefont',
330
        'base',
331
        'behavior',
332
        'bgsound',
333
        'blink',
334
        'body',
335
        'embed',
336
        'eval',
337
        'expression',
338
        'form',
339
        'frameset',
340
        'frame',
341
        'head',
342
        'html',
343
        'ilayer',
344
        'iframe',
345
        'input',
346
        'button',
347
        'select',
348
        'isindex',
349
        'layer',
350
        'link',
351
        'meta',
352
        'keygen',
353
        'object',
354
        'plaintext',
355
        'style',
356
        'script',
357
        'textarea',
358
        'title',
359
        'math',
360
        'noscript',
361
        'event-source',
362
        'vmlframe',
363
        'video',
364
        'source',
365
        'svg',
366
        'xml',
367
    ];
368
369
    /**
370
     * @var string
371
     */
372
    private $_spacing_regex = '(?:\s|"|\'|\+|&#x0[9A-F];|%0[9a-f])*?';
373
374
    /**
375
     * The replacement-string for not allowed strings.
376
     *
377
     * @var string
378
     */
379
    private $_replacement = '';
380
381
    /**
382
     * List of never allowed strings.
383
     *
384
     * @var string[]
385
     */
386
    private $_never_allowed_str = [];
387
388
    /**
389
     * If your DB (MySQL) encoding is "utf8" and not "utf8mb4", then
390
     * you can't save 4-Bytes chars from UTF-8 and someone can create stored XSS-attacks.
391
     *
392
     * @var bool
393
     */
394
    private $_stripe_4byte_chars = false;
395
396
    /**
397
     * @var bool|null
398
     */
399
    private $_xss_found;
400
401
    /**
402
     * @var string
403
     */
404
    private $_cache_evil_attributes_regex_string = '';
405
406
    /**
407
     * @var string
408
     */
409
    private $_cache_never_allowed_regex_string = '';
410
411
    /**
412
     * @var string
413
     */
414
    private $_cache__evil_html_tags_str = '';
415
416
    /**
417
     * __construct()
418
     */
419 99
    public function __construct()
420
    {
421 99
        $this->_initNeverAllowedStr();
422 99
        $this->_initNeverAllowedRegex();
423 99
    }
424
425
    /**
426
     * Compact any exploded words.
427
     *
428
     * <p>
429
     * <br />
430
     * INFO: This corrects words like:  j a v a s c r i p t
431
     * <br />
432
     * These words are compacted back to their correct state.
433
     * </p>
434
     *
435
     * @param string $str
436
     *
437
     * @return string
438
     */
439 99
    private function _compact_exploded_javascript(string $str)
440
    {
441 99
        static $WORDS_CACHE;
442 99
        $WORDS_CACHE['chunk'] = [];
443 99
        $WORDS_CACHE['split'] = [];
444
445
        $words = [
446 99
            'javascript',
447
            '<script',
448
            '</script>',
449
            'base64',
450
            'document',
451
            'eval',
452
        ];
453
454
        // check if we need to perform the regex-stuff
455 99
        if (\strlen($str) <= 30) {
456 75
            $useStrPos = true;
457
        } else {
458 86
            $useStrPos = false;
459
        }
460
461 99
        foreach ($words as $word) {
462 99
            if (!isset($WORDS_CACHE['chunk'][$word])) {
463 99
                $WORDS_CACHE['chunk'][$word] = \substr(
464 99
                    \chunk_split($word, 1, $this->_spacing_regex),
465 99
                    0,
466 99
                    -\strlen($this->_spacing_regex)
467
                );
468
469 99
                $WORDS_CACHE['split'][$word] = \str_split($word, 1);
470
            }
471
472 99
            if ($useStrPos) {
473 75
                foreach ($WORDS_CACHE['split'][$word] as $charTmp) {
474 75
                    if (\stripos($str, $charTmp) === false) {
475 75
                        continue 2;
476
                    }
477
                }
478
            }
479
480
            // We only want to do this when it is followed by a non-word character.
481
            // And if there are no char at the start of the string.
482
            //
483
            // That way valid stuff like "dealer to!" does not become "dealerto".
484
485 93
            $str = (string) \preg_replace_callback(
486 93
                '#(?<before>[^\p{L}]|^)(?<word>' . \str_replace(
487 93
                    ['#', '.'],
488 93
                    ['\#', '\.'],
489 93
                    $WORDS_CACHE['chunk'][$word]
490 93
                ) . ')(?<after>[^\p{L}@.!? ]|$)#ius',
491
                function ($matches) {
492 58
                    return $this->_compact_exploded_words_callback($matches);
493 93
                },
494 93
                $str
495
            );
496
        }
497
498 99
        return $str;
499
    }
500
501
    /**
502
     * Compact exploded words.
503
     *
504
     * <p>
505
     * <br />
506
     * INFO: Callback method for xss_clean() to remove whitespace from things like 'j a v a s c r i p t'.
507
     * </p>
508
     *
509
     * @param string[] $matches
510
     *
511
     * @return  string
512
     */
513 58
    private function _compact_exploded_words_callback($matches)
514
    {
515 58
        return $matches['before'] . \preg_replace(
516 58
            '/' . $this->_spacing_regex . '/ius',
517 58
            '',
518 58
            $matches['word']
519 58
        ) . $matches['after'];
520
    }
521
522
    /**
523
     * HTML-Entity decode callback.
524
     *
525
     * @param string[] $match
526
     *
527
     * @return string
528
     */
529 93
    private function _decode_entity(array $match)
530
    {
531
        // init
532 93
        $str = $match[0];
533
534
        // protect GET variables without XSS in URLs
535 93
        $needProtection = true;
536 93
        if (\strpos($str, '=') !== false) {
537 88
            $strCopy = $str;
538 88
            $matchesTmp = [];
539 88 View Code Duplication
            while (\preg_match("/[?|&]?[\p{L}0-9_\-\[\]]+\s*=\s*([\"'])(?<attr>[^\1]*?)\\1/u", $strCopy, $matches)) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
540 64
                $matchesTmp[] = $matches;
541 64
                $strCopy = \str_replace($matches[0], '', $strCopy);
542
543 64
                if (\substr_count($strCopy, '"') <= 1 && \substr_count($strCopy, '\'') <= 1) {
544 58
                    break;
545
                }
546
            }
547
548 88
            if ($strCopy === $str) {
549 41
                $needProtection = true;
550
            } else {
551 64
                $needProtection = false;
552 64
                foreach ($matchesTmp as $matches) {
553 64
                    if (isset($matches['attr'])) {
554 64
                        $tmpAntiXss = clone $this;
555
556 64
                        $urlPartClean = $tmpAntiXss->xss_clean($matches['attr']);
557
558 64
                        if ($tmpAntiXss->isXssFound() === true) {
559 45
                            $this->_xss_found = true;
560
561 45
                            $urlPartClean = \str_replace(['&lt;', '&gt;'], [self::VOKU_ANTI_XSS_LT, self::VOKU_ANTI_XSS_GT], $urlPartClean);
562 45
                            $urlPartClean = UTF8::rawurldecode($urlPartClean);
563 45
                            $urlPartClean = \str_replace([self::VOKU_ANTI_XSS_LT, self::VOKU_ANTI_XSS_GT], ['&lt;', '&gt;'], $urlPartClean);
564
565 64
                            $str = \str_ireplace($matches['attr'], $urlPartClean, $str);
566
                        }
567
                    }
568
                }
569
            }
570
        }
571
572 93
        if ($needProtection) {
573 58
            $str = \str_replace(['&lt;', '&gt;'], [self::VOKU_ANTI_XSS_LT, self::VOKU_ANTI_XSS_GT], $str);
574 58
            $str = $this->_entity_decode(UTF8::rawurldecode($str));
575 58
            $str = \str_replace([self::VOKU_ANTI_XSS_LT, self::VOKU_ANTI_XSS_GT], ['&lt;', '&gt;'], $str);
576
        }
577
578 93
        return $str;
579
    }
580
581
    /**
582
     * Decode the html-tags via "UTF8::html_entity_decode()" or the string via "UTF8::rawurldecode()".
583
     *
584
     * @param string $str
585
     *
586
     * @return string
587
     */
588 99
    private function _decode_string(string $str)
589
    {
590
        // init
591 99
        $regExForHtmlTags = '/<\p{L}+.*+/us';
592
593
        if (
594 99
            \strpos($str, '<') !== false
595
            &&
596 99
            \preg_match($regExForHtmlTags, $str, $matches) === 1
597
        ) {
598 93
            $str = (string) \preg_replace_callback(
599 93
                $regExForHtmlTags,
600
                function ($matches) {
601 93
                    return $this->_decode_entity($matches);
602 93
                },
603 93
                $str
604
            );
605
        } else {
606 87
            $str = UTF8::rawurldecode($str);
607
        }
608
609 99
        return $str;
610
    }
611
612
    /**
613
     * @param string $str
614
     *
615
     * @return mixed
616
     */
617 99
    private function _do($str)
618
    {
619 99
        $str = (string) $str;
620 99
        $strInt = (int) $str;
621 99
        $strFloat = (float) $str;
622
        if (
623 99
            !$str
624
            ||
625 99
            (string) $strInt === $str
626
            ||
627 99
            (string) $strFloat === $str
628
        ) {
629
630
            // no xss found
631 26
            if ($this->_xss_found !== true) {
632 23
                $this->_xss_found = false;
633
            }
634
635 26
            return $str;
636
        }
637
638
        // remove the BOM from UTF-8 / UTF-16 / UTF-32 strings
639 99
        $str = UTF8::remove_bom($str);
640
641
        // replace the diamond question mark (�) and invalid-UTF8 chars
642 99
        $str = UTF8::replace_diamond_question_mark($str, '');
643
644
        // replace invisible characters with one single space
645 99
        $str = UTF8::remove_invisible_characters($str, true, ' ');
646
647
        // normalize the whitespace
648 99
        $str = UTF8::normalize_whitespace($str);
649
650
        // decode UTF-7 characters
651 99
        $str = $this->_repack_utf7($str);
652
653
        // decode the string
654 99
        $str = $this->_decode_string($str);
655
656
        // remove all >= 4-Byte chars if needed
657 99
        if ($this->_stripe_4byte_chars) {
658 1
            $str = (string) \preg_replace('/[\x{10000}-\x{10FFFF}]/u', '', $str);
659
        }
660
661
        // backup the string (for later comparision)
662 99
        $str_backup = $str;
663
664
        // correct words before the browser will do it
665 99
        $str = $this->_compact_exploded_javascript($str);
666
667
        // remove disallowed javascript calls in links, images etc.
668 99
        $str = $this->_remove_disallowed_javascript($str);
669
670
        // remove strings that are never allowed
671 99
        $str = $this->_do_never_allowed($str);
672
673
        // remove evil attributes such as style, onclick and xmlns
674 99
        $str = $this->_remove_evil_attributes($str);
675
676
        // sanitize naughty JavaScript elements
677 99
        $str = $this->_sanitize_naughty_javascript($str);
678
679
        // sanitize naughty HTML elements
680 99
        $str = $this->_sanitize_naughty_html($str);
681
682
        // final clean up
683
        //
684
        // -> This adds a bit of extra precaution in case something got through the above filters.
685 99
        $str = $this->_do_never_allowed_afterwards($str);
686
687
        // check for xss
688 99
        if ($this->_xss_found !== true) {
689 99
            $this->_xss_found = !($str_backup === $str);
690
        }
691
692 99
        return $str;
693
    }
694
695
    /**
696
     * Remove never allowed strings.
697
     *
698
     * @param string $str
699
     *
700
     * @return string
701
     */
702 99
    private function _do_never_allowed(string $str)
703
    {
704 99
        static $NEVER_ALLOWED_CACHE = [];
705
706 99
        $NEVER_ALLOWED_CACHE['keys'] = null;
707
708 99
        if ($NEVER_ALLOWED_CACHE['keys'] === null) {
709 99
            $NEVER_ALLOWED_CACHE['keys'] = \array_keys($this->_never_allowed_str);
710
        }
711
712 99
        $str = \str_ireplace(
713 99
            $NEVER_ALLOWED_CACHE['keys'],
714 99
            $this->_never_allowed_str,
715 99
            $str
716
        );
717
718
        // ---
719
720 99
        $replaceNeverAllowedCall = [];
721 99
        foreach (self::$_never_allowed_call as $call) {
722 99
            if (\stripos($str, $call) !== false) {
723 99
                $replaceNeverAllowedCall[] = $call;
724
            }
725
        }
726 99
        if (\count($replaceNeverAllowedCall) > 0) {
727 39
            $str = (string) \preg_replace(
728 39
                '#([^\p{L}]|^)(?:' . \implode('|', $replaceNeverAllowedCall) . ')\s*:(?:.*?([/\\\;()\'">]|$))#ius',
729 39
                '$1' . $this->_replacement . '$2',
730 39
                $str
731
            );
732
        }
733
734
        // ---
735
736 99
        $regex_combined = [];
737 99
        foreach ($this->_never_allowed_regex as $regex => $replacement) {
738 99
            if ($replacement === $this->_replacement) {
739 99
                $regex_combined[] = $regex;
740
741 99
                continue;
742
            }
743
744 99
            $str = (string) \preg_replace(
745 99
                '#' . $regex . '#iUus',
746 99
                $replacement,
747 99
                $str
748
            );
749
        }
750
751 99
        if (!$this->_cache_never_allowed_regex_string || $regex_combined !== []) {
752 99
            $this->_cache_never_allowed_regex_string = \implode('|', $regex_combined);
753
        }
754
755 99
        if ($this->_cache_never_allowed_regex_string) {
756 99
            $str = (string) \preg_replace(
757 99
                '#' . $this->_cache_never_allowed_regex_string . '#ius',
758 99
                $this->_replacement,
759 99
                $str
760
            );
761
        }
762
763 99
        return $str;
764
    }
765
766
    /**
767
     * Remove never allowed string, afterwards.
768
     *
769
     * <p>
770
     * <br />
771
     * INFO: clean-up also some string, if there is no html-tag
772
     * </p>
773
     *
774
     * @param string $str
775
     *
776
     * @return  string
777
     */
778 99
    private function _do_never_allowed_afterwards(string $str)
779
    {
780 99
        if (\stripos($str, 'on') !== false) {
781 49
            foreach ($this->_never_allowed_on_events_afterwards as $event) {
782 49
                if (\stripos($str, $event) !== false) {
783 20
                    $regex = '(?<before>[^\p{L}]|^)(?:' . $event . ')(?<after>\(.*?\)|.*?>|(?:\s|\[.*?\])*?=(?:\s|\[.*?\])*?|(?:\s|\[.*?\])*?&equals;(?:\s|\[.*?\])*?|[^\p{L}]*?=[^\p{L}]*?|[^\p{L}]*?&equals;[^\p{L}]*?|$|\s*?>*?$)';
784
785
                    do {
786 20
                        $count = $temp_count = 0;
787
788 20
                        $str = (string) \preg_replace(
789 20
                            '#' . $regex . '#ius',
790 20
                            '$1' . $this->_replacement . '$2',
791 20
                            $str,
792 20
                            -1,
793 20
                            $temp_count
794
                        );
795 20
                        $count += $temp_count;
796 49
                    } while ($count);
797
                }
798
            }
799
        }
800
801 99
        return (string) \str_ireplace(
802 99
            $this->_never_allowed_str_afterwards,
803 99
            $this->_replacement,
804 99
            $str
805
        );
806
    }
807
808
    /**
809
     * Entity-decoding.
810
     *
811
     * @param string $str
812
     *
813
     * @return string
814
     */
815 58
    private function _entity_decode(string $str)
816
    {
817 58
        static $HTML_ENTITIES_CACHE;
818
819 58
        $flags = ENT_QUOTES | ENT_HTML5 | ENT_DISALLOWED | ENT_SUBSTITUTE;
820
821
        // decode
822 58
        $str = UTF8::html_entity_decode($str, $flags);
823
824
        // decode-again, for e.g. HHVM or miss configured applications ...
825
        if (
826 58
            \strpos($str, '&') !== false
827
            &&
828 58
            \preg_match_all('/(?<html_entity>&[A-Za-z]{2,}[;]{0})/', $str, $matches)
829
        ) {
830 2
            if ($HTML_ENTITIES_CACHE === null) {
831
832
                // links:
833
                // - http://dev.w3.org/html5/html-author/charref
834
                // - http://www.w3schools.com/charsets/ref_html_entities_n.asp
835
                $entitiesSecurity = [
836 1
                    '&#x00000;'          => '',
837
                    '&#0;'               => '',
838
                    '&#x00001;'          => '',
839
                    '&#1;'               => '',
840
                    '&nvgt;'             => '',
841
                    '&#61253;'           => '',
842
                    '&#x0EF45;'          => '',
843
                    '&shy;'              => '',
844
                    '&#x000AD;'          => '',
845
                    '&#173;'             => '',
846
                    '&colon;'            => ':',
847
                    '&#x0003A;'          => ':',
848
                    '&#58;'              => ':',
849
                    '&lpar;'             => '(',
850
                    '&#x00028;'          => '(',
851
                    '&#40;'              => '(',
852
                    '&rpar;'             => ')',
853
                    '&#x00029;'          => ')',
854
                    '&#41;'              => ')',
855
                    '&quest;'            => '?',
856
                    '&#x0003F;'          => '?',
857
                    '&#63;'              => '?',
858
                    '&sol;'              => '/',
859
                    '&#x0002F;'          => '/',
860
                    '&#47;'              => '/',
861
                    '&apos;'             => '\'',
862
                    '&#x00027;'          => '\'',
863
                    '&#039;'             => '\'',
864
                    '&#39;'              => '\'',
865
                    '&#x27;'             => '\'',
866
                    '&bsol;'             => '\'',
867
                    '&#x0005C;'          => '\\',
868
                    '&#92;'              => '\\',
869
                    '&comma;'            => ',',
870
                    '&#x0002C;'          => ',',
871
                    '&#44;'              => ',',
872
                    '&period;'           => '.',
873
                    '&#x0002E;'          => '.',
874
                    '&quot;'             => '"',
875
                    '&QUOT;'             => '"',
876
                    '&#x00022;'          => '"',
877
                    '&#34;'              => '"',
878
                    '&grave;'            => '`',
879
                    '&DiacriticalGrave;' => '`',
880
                    '&#x00060;'          => '`',
881
                    '&#96;'              => '`',
882
                    '&#46;'              => '.',
883
                    '&equals;'           => '=',
884
                    '&#x0003D;'          => '=',
885
                    '&#61;'              => '=',
886
                    '&newline;'          => "\n",
887
                    '&#x0000A;'          => "\n",
888
                    '&#10;'              => "\n",
889
                    '&tab;'              => "\t",
890
                    '&#x00009;'          => "\t",
891
                    '&#9;'               => "\t",
892
                ];
893
894 1
                $HTML_ENTITIES_CACHE = \array_merge(
895 1
                    $entitiesSecurity,
896 1
                    \array_flip(\get_html_translation_table(HTML_ENTITIES, $flags)),
897 1
                    \array_flip(self::_get_data('entities_fallback'))
898
                );
899
            }
900
901 2
            $search = [];
902 2
            $replace = [];
903 2
            foreach ($matches['html_entity'] as $match) {
904 2
                $match .= ';';
905 2
                if (isset($HTML_ENTITIES_CACHE[$match])) {
906
                    $search[$match] = $match;
907 2
                    $replace[$match] = $HTML_ENTITIES_CACHE[$match];
908
                }
909
            }
910
911 2
            if (\count($replace) > 0) {
912
                $str = \str_ireplace($search, $replace, $str);
913
            }
914
        }
915
916 58
        return $str;
917
    }
918
919
    /**
920
     * Filters tag attributes for consistency and safety.
921
     *
922
     * @param string $str
923
     *
924
     * @return string
925
     */
926 41
    private function _filter_attributes(string $str)
927
    {
928 41
        if ($str === '') {
929 17
            return '';
930
        }
931
932 41
        if (\strpos($str, '=') !== false) {
933 40
            $matchesTmp = [];
934 40 View Code Duplication
            while (\preg_match('#\s*[\p{L}0-9_\-\[\]]+\s*=\s*(["\'])(?:[^\1]*?)\\1#u', $str, $matches)) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
935 33
                $matchesTmp[] = $matches[0];
936 33
                $str = \str_replace($matches[0], '', $str);
937
938 33
                if (\substr_count($str, '"') <= 1 && \substr_count($str, '\'') <= 1) {
939 33
                    break;
940
                }
941
            }
942 40
            $out = \implode('', $matchesTmp);
943
        } else {
944 11
            $out = $str;
945
        }
946
947 41
        return $out;
948
    }
949
950
    /**
951
     * get data from "/data/*.php"
952
     *
953
     * @param string $file
954
     *
955
     * @return mixed
956
     */
957 1
    private static function _get_data(string $file)
958
    {
959
        /** @noinspection PhpIncludeInspection */
960 1
        return include __DIR__ . '/data/' . $file . '.php';
961
    }
962
963
    /**
964
     * initialize "$this->_never_allowed_str"
965
     *
966
     * @return void
967
     */
968 99
    private function _initNeverAllowedStr()
969
    {
970 99
        $this->_never_allowed_str = [
0 ignored issues
show
Documentation Bug introduced by
It seems like array('document.cookie' ...IST' => '&lt;!ATTLIST') of type array<string,string,{"do...,"<!ATTLIST":"string"}> is incompatible with the declared type array<integer,string> of property $_never_allowed_str.

Our type inference engine has found an assignment to a property that is incompatible with the declared type of that property.

Either this assignment is in error or the assigned type should be added to the documentation/type hint for that property..

Loading history...
971 99
            'document.cookie'   => $this->_replacement,
972 99
            '(document).cookie' => $this->_replacement,
973 99
            'document.write'    => $this->_replacement,
974 99
            '(document).write'  => $this->_replacement,
975 99
            '.parentNode'       => $this->_replacement,
976 99
            '.innerHTML'        => $this->_replacement,
977 99
            '.appendChild'      => $this->_replacement,
978 99
            '-moz-binding'      => $this->_replacement,
979 99
            '<?'                => '&lt;?',
980 99
            '?>'                => '?&gt;',
981 99
            '<![CDATA['         => '&lt;![CDATA[',
982 99
            '<!ENTITY'          => '&lt;!ENTITY',
983 99
            '<!DOCTYPE'         => '&lt;!DOCTYPE',
984 99
            '<!ATTLIST'         => '&lt;!ATTLIST',
985
        ];
986 99
    }
987
988
    /**
989
     * initialize "$this->_never_allowed_regex"
990
     *
991
     * @return void
992
     */
993 99
    private function _initNeverAllowedRegex()
994
    {
995 99
        $this->_never_allowed_regex = [
0 ignored issues
show
Documentation Bug introduced by
It seems like array('(\\(?:?document\\...', '<!--' => '&lt;!--') of type array<string,string,{"(\...ring","<!--":"string"}> is incompatible with the declared type array<integer,string> of property $_never_allowed_regex.

Our type inference engine has found an assignment to a property that is incompatible with the declared type of that property.

Either this assignment is in error or the assigned type should be added to the documentation/type hint for that property..

Loading history...
996
            // default javascript
997 99
            '(\(?:?document\)?|\(?:?window\)?(?:\.document)?)\.(?:location|on\w*)' => $this->_replacement,
998
            // data-attribute + base64
999 99
            "([\"'])?data\s*:\s*(?!image\s*\/\s*(?!svg.*?))[^\1]*?base64[^\1]*?,[^\1]*?\1?" => $this->_replacement,
1000
            // old IE, old Netscape
1001 99
            'expression\s*(?:\(|&\#40;)' => $this->_replacement,
1002
            // src="js"
1003 99
            'src\=(?<wrapper>[\'|"]).*\.js(?:\g{wrapper})' => $this->_replacement,
1004
            // comments
1005 99
            '<!--(.*)-->' => '&lt;!--$1--&gt;',
1006 99
            '<!--'        => '&lt;!--',
1007
        ];
1008 99
    }
1009
1010
    /**
1011
     * Callback method for xss_clean() to sanitize links.
1012
     *
1013
     * <p>
1014
     * <br />
1015
     * INFO: This limits the PCRE backtracks, making it more performance friendly
1016
     * and prevents PREG_BACKTRACK_LIMIT_ERROR from being triggered in
1017
     * PHP 5.2+ on link-heavy strings.
1018
     * </p>
1019
     *
1020
     * @param string[] $match
1021
     *
1022
     * @return string
1023
     */
1024 25
    private function _js_link_removal_callback(array $match)
1025
    {
1026 25
        return $this->_js_removal_callback($match, 'href');
1027
    }
1028
1029
    /**
1030
     * Callback method for xss_clean() to sanitize tags.
1031
     *
1032
     * <p>
1033
     * <br />
1034
     * INFO: This limits the PCRE backtracks, making it more performance friendly
1035
     * and prevents PREG_BACKTRACK_LIMIT_ERROR from being triggered in
1036
     * PHP 5.2+ on image tag heavy strings.
1037
     * </p>
1038
     *
1039
     * @param string[]  $match
1040
     * @param string $search
1041
     *
1042
     * @return string
1043
     */
1044 41
    private function _js_removal_callback(array $match, string $search)
1045
    {
1046 41
        if (!$match[0]) {
1047
            return '';
1048
        }
1049
1050
        // init
1051 41
        $match_style_matched = false;
1052 41
        $match_style = [];
1053
1054
        // hack for style attributes v1
1055
        if (
1056 41
            $search === 'href'
1057
            &&
1058 41
            \stripos($match[0], 'style') !== false
1059
        ) {
1060 6
            \preg_match('/style=".*?"/ius', $match[0], $match_style);
1061 6
            $match_style_matched = (\count($match_style) > 0);
1062 6
            if ($match_style_matched) {
1063 4
                $match[0] = \str_ireplace($match_style[0], self::VOKU_ANTI_XSS_STYLE, $match[0]);
1064
            }
1065
        }
1066
1067 41
        $replacer = $this->_filter_attributes(\str_replace(['<', '>'], '', $match[1]));
1068
1069 41
        $foundEqualSign = \strpos($match[1], '=') !== false;
1070
1071
        // filter for "$search"-attributes
1072
        if (
1073 41
            $foundEqualSign
1074
            &&
1075 41
            \stripos($replacer, $search) !== false
1076
        ) {
1077 30
            $pattern = '#' . $search . '=(?<wrapper>[\'|"]).*(?:\g{wrapper})#isU';
1078 30
            $matchInner = [];
1079 30
            $foundSomethingBad = false;
1080 30
            \preg_match($pattern, $match[1], $matchInner);
1081 30
            if (\count($matchInner) > 0) {
1082 30
                $tmpAntiXss = clone $this;
1083
1084
                /** @noinspection UnusedFunctionResultInspection */
1085 30
                $tmpAntiXss->xss_clean($matchInner[0]);
1086
1087 30
                if ($tmpAntiXss->isXssFound() === true) {
1088 9
                    $foundSomethingBad = true;
1089 9
                    $this->_xss_found = true;
1090
1091 9
                    $replacer = (string) \preg_replace(
1092 9
                        $pattern,
1093 9
                        $search . '="' . $this->_replacement . '"',
1094 9
                        $replacer
1095
                    );
1096
                }
1097
            }
1098
1099 30
            if (!$foundSomethingBad) {
1100
                // filter for javascript
1101 30
                $patternTmp = '';
1102 30
                foreach (self::$_never_allowed_call as $callTmp) {
1103 30
                    if (\stripos($match[0], $callTmp) !== false) {
1104 30
                        $patternTmp .= $callTmp . ':|';
1105
                    }
1106
                }
1107 30
                $pattern = '#' . $search . '=.*(?:' . $patternTmp . '\(?window\)?\.|\(?history\)?\.|\(?location\)?\.|\(?document\)?\.|\(?cookie\)?\.|\(?ScriptElement\)?\.|d\s*a\s*t\s*a\s*:)#ius';
1108 30
                $matchInner = [];
1109 30
                \preg_match($pattern, $match[1], $matchInner);
1110 30
                if (\count($matchInner) > 0) {
1111 3
                    $replacer = (string) \preg_replace(
1112 3
                        $pattern,
1113 3
                        $search . '="' . $this->_replacement . '"',
1114 3
                        $replacer
1115
                    );
1116
                }
1117
            }
1118
        }
1119
1120 41
        $return = \str_ireplace($match[1], $replacer, (string) $match[0]);
1121
1122
        // hack for style attributes v2
1123
        if (
1124 41
            $match_style_matched
1125
            &&
1126 41
            $search === 'href'
1127
        ) {
1128 4
            $return = \str_replace(self::VOKU_ANTI_XSS_STYLE, $match_style[0], $return);
1129
        }
1130
1131 41
        return $return;
1132
    }
1133
1134
    /**
1135
     * Callback method for xss_clean() to sanitize image tags.
1136
     *
1137
     * <p>
1138
     * <br />
1139
     * INFO: This limits the PCRE backtracks, making it more performance friendly
1140
     * and prevents PREG_BACKTRACK_LIMIT_ERROR from being triggered in
1141
     * PHP 5.2+ on image tag heavy strings.
1142
     * </p>
1143
     *
1144
     * @param string[] $match
1145
     *
1146
     * @return string
1147
     */
1148 28
    private function _js_src_removal_callback(array $match)
1149
    {
1150 28
        return $this->_js_removal_callback($match, 'src');
1151
    }
1152
1153
    /**
1154
     * Remove disallowed Javascript in links or img tags
1155
     *
1156
     * <p>
1157
     * <br />
1158
     * We used to do some version comparisons and use of stripos(),
1159
     * but it is dog slow compared to these simplified non-capturing
1160
     * preg_match(), especially if the pattern exists in the string
1161
     * </p>
1162
     *
1163
     * <p>
1164
     * <br />
1165
     * Note: It was reported that not only space characters, but all in
1166
     * the following pattern can be parsed as separators between a tag name
1167
     * and its attributes: [\d\s"\'`;,\/\=\(\x00\x0B\x09\x0C]
1168
     * ... however, UTF8::clean() above already strips the
1169
     * hex-encoded ones, so we'll skip them below.
1170
     * </p>
1171
     *
1172
     * @param string $str
1173
     *
1174
     * @return string
1175
     */
1176 99
    private function _remove_disallowed_javascript($str)
1177
    {
1178
        do {
1179 99
            $original = $str;
1180
1181 99
            if (\stripos($str, '<a') !== false) {
1182 27
                $str = (string) \preg_replace_callback(
1183 27
                    '#<a[^\p{L}@>]+([^>]*?)(?:>|$)#iu',
1184
                    function ($matches) {
1185 25
                        return $this->_js_link_removal_callback($matches);
1186 27
                    },
1187 27
                    $str
1188
                );
1189
            }
1190
1191 99
            if (\stripos($str, '<img') !== false) {
1192 29
                $str = (string) \preg_replace_callback(
1193 29
                    '#<img[^\p{L}@]+([^>]*?)(?:\s?/?>|$)#iu',
1194
                    function ($matches) {
1195
                        if (
1196 29
                            \strpos($matches[1], 'base64') !== false
1197
                            &&
1198 29
                            \preg_match("/([\"'])?data\s*:\s*(?:image\s*\/.*)[^\1]*base64[^\1]*,[^\1]*\1?/iUus", $matches[1])
1199
                        ) {
1200 2
                            return $matches[0];
1201
                        }
1202
1203 28
                        return $this->_js_src_removal_callback($matches);
1204 29
                    },
1205 29
                    $str
1206
                );
1207
            }
1208
1209 99 View Code Duplication
            if (\stripos($str, '<audio') !== false) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
1210 3
                $str = (string) \preg_replace_callback(
1211 3
                    '#<audio[^\p{L}@]+([^>]*?)(?:\s?/?>|$)#iu',
1212
                    function ($matches) {
1213 3
                        return $this->_js_src_removal_callback($matches);
1214 3
                    },
1215 3
                    $str
1216
                );
1217
            }
1218
1219 99 View Code Duplication
            if (\stripos($str, '<video') !== false) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
1220 5
                $str = (string) \preg_replace_callback(
1221 5
                    '#<video[^\p{L}@]+([^>]*?)(?:\s?/?>|$)#iu',
1222
                    function ($matches) {
1223 4
                        return $this->_js_src_removal_callback($matches);
1224 5
                    },
1225 5
                    $str
1226
                );
1227
            }
1228
1229 99 View Code Duplication
            if (\stripos($str, '<source') !== false) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
1230 3
                $str = (string) \preg_replace_callback(
1231 3
                    '#<source[^\p{L}@]+([^>]*?)(?:\s?/?>|$)#iu',
1232
                    function ($matches) {
1233 3
                        return $this->_js_src_removal_callback($matches);
1234 3
                    },
1235 3
                    $str
1236
                );
1237
            }
1238
1239 99 View Code Duplication
            if (\stripos($str, 'script') !== false) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
1240
                // INFO: US-ASCII: ¼ === <
1241 57
                $str = (string) \preg_replace(
1242 57
                    '#(?:%3C|¼|<)\s*script[^\p{L}@]+(?:[^>]*)(?:\s?/?(?:%3E|¾|>)|$)#iu',
1243 57
                    $this->_replacement,
1244 57
                    $str
1245
                );
1246
            }
1247
1248 99 View Code Duplication
            if (\stripos($str, 'script') !== false) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
1249
                // INFO: US-ASCII: ¼ === <
1250 50
                $str = (string) \preg_replace(
1251 50
                    '#(?:%3C|¼|<)[^\p{L}@]*/*[^\p{L}@]*(?:script[^\p{L}@]+).*(?:%3E|¾|>)?#iUus',
1252 50
                    $this->_replacement,
1253 50
                    $str
1254
                );
1255
            }
1256 99
        } while ($original !== $str);
1257
1258 99
        return (string) $str;
1259
    }
1260
1261
    /**
1262
     * Remove Evil HTML Attributes (like event handlers and style).
1263
     *
1264
     * It removes the evil attribute and either:
1265
     *
1266
     *  - Everything up until a space. For example, everything between the pipes:
1267
     *
1268
     * <code>
1269
     *   <a |style=document.write('hello');alert('world');| class=link>
1270
     * </code>
1271
     *
1272
     *  - Everything inside the quotes. For example, everything between the pipes:
1273
     *
1274
     * <code>
1275
     *   <a |style="document.write('hello'); alert('world');"| class="link">
1276
     * </code>
1277
     *
1278
     * @param string $str <p>The string to check.</p>
1279
     *
1280
     * @return string the string with the evil attributes removed
1281
     */
1282 99
    private function _remove_evil_attributes($str)
1283
    {
1284
        // replace style-attribute, first (if needed)
1285
        if (
1286 99
            \stripos($str, 'style') !== false
1287
            &&
1288 99
            \in_array('style', $this->_evil_attributes_regex, true)
1289
        ) {
1290
            do {
1291 19
                $count = $temp_count = 0;
1292
1293 19
                $str = (string) \preg_replace(
1294 19
                    '/(<[^>]+)(?<!\p{L})(style\s*=\s*"(?:[^"]*?)"|style\s*=\s*\'(?:[^\']*?)\')/iu',
1295 19
                    '$1' . $this->_replacement,
1296 19
                    $str,
1297 19
                    -1,
1298 19
                    $temp_count
1299
                );
1300 19
                $count += $temp_count;
1301 19
            } while ($count);
1302
        }
1303
1304 99
        if (!$this->_cache_evil_attributes_regex_string) {
1305 99
            $this->_cache_evil_attributes_regex_string = \implode('|', $this->_evil_attributes_regex);
1306 99
            $this->_cache_evil_attributes_regex_string .= '|' . \implode('\w*|', $this->_never_allowed_on_events_afterwards);
1307
        }
1308
1309
        do {
1310 99
            $count = $temp_count = 0;
1311
1312
            // find occurrences of illegal attribute strings with and without quotes (" and ' are octal quotes)
1313 99
            $str = (string) \preg_replace(
1314 99
                '/(.*)((?:<[^>]+)(?<!\p{L}))(?:' . $this->_cache_evil_attributes_regex_string . ')(?:\s*=\s*)(?:\'(?:.*?)\'|"(?:.*?)")(.*)/ius',
1315 99
                '$1$2' . $this->_replacement . '$3$4',
1316 99
                $str,
1317 99
                -1,
1318 99
                $temp_count
1319
            );
1320 99
            $count += $temp_count;
1321
1322 99
            $str = (string) \preg_replace(
1323 99
                '/(.*?)(<[^>]+)(?<!\p{L})(?:' . $this->_cache_evil_attributes_regex_string . ')\s*=\s*(?:[^\s>]*)(.*?)/ius',
1324 99
                '$1$2' . $this->_replacement . '$3',
1325 99
                $str,
1326 99
                -1,
1327 99
                $temp_count
1328
            );
1329 99
            $count += $temp_count;
1330 99
        } while ($count);
1331
1332 99
        return (string) $str;
1333
    }
1334
1335
    /**
1336
     * UTF-7 decoding function.
1337
     *
1338
     * @param string $str <p>HTML document for recode ASCII part of UTF-7 back to ASCII.</p>
1339
     *
1340
     * @return string
1341
     */
1342 99
    private function _repack_utf7(string $str)
1343
    {
1344 99
        if (\strpos($str, '-') === false) {
1345 93
            return $str;
1346
        }
1347
1348 38
        return (string) \preg_replace_callback(
1349 38
            '#\+([\p{L}0-9]+)-#iu',
1350
            function ($matches) {
1351 4
                return $this->_repack_utf7_callback($matches);
1352 38
            },
1353 38
            $str
1354
        );
1355
    }
1356
1357
    /**
1358
     * Additional UTF-7 decoding function.
1359
     *
1360
     * @param string[] $strings <p>Array of strings for recode ASCII part of UTF-7 back to ASCII.</p>
1361
     *
1362
     * @return string
1363
     */
1364 4
    private function _repack_utf7_callback(array $strings)
1365
    {
1366 4
        $strTmp = \base64_decode($strings[1], true);
1367
1368 4
        if ($strTmp === false) {
1369
            return $strings[0];
1370
        }
1371
1372 4
        if (\rtrim(\base64_encode($strTmp), '=') !== \rtrim($strings[1], '=')) {
1373 1
            return $strings[0];
1374
        }
1375
1376 3
        $string = (string) \preg_replace_callback(
1377 3
            '/^((?:\x00.)*?)((?:[^\x00].)+)/us',
1378
            function ($matches) {
1379
                return $this->_repack_utf7_callback_back($matches);
1380 3
            },
1381 3
            $strTmp
1382
        );
1383
1384 3
        return (string) \preg_replace(
1385 3
            '/\x00(.)/us',
1386 3
            '$1',
1387 3
            $string
1388
        );
1389
    }
1390
1391
    /**
1392
     * Additional UTF-7 encoding function.
1393
     *
1394
     * @param string $str <p>String for recode ASCII part of UTF-7 back to ASCII.</p>
1395
     *
1396
     * @return string
1397
     */
1398
    private function _repack_utf7_callback_back($str)
1399
    {
1400
        return $str[1] . '+' . \rtrim(\base64_encode($str[2]), '=') . '-';
1401
    }
1402
1403
    /**
1404
     * Sanitize naughty HTML elements.
1405
     *
1406
     * <p>
1407
     * <br />
1408
     *
1409
     * If a tag containing any of the words in the list
1410
     * below is found, the tag gets converted to entities.
1411
     *
1412
     * <br /><br />
1413
     *
1414
     * So this: <blink>
1415
     * <br />
1416
     * Becomes: &lt;blink&gt;
1417
     * </p>
1418
     *
1419
     * @param string $str
1420
     *
1421
     * @return string
1422
     */
1423 99
    private function _sanitize_naughty_html($str)
1424
    {
1425
        // init
1426 99
        $strEnd = '';
1427
1428
        do {
1429 99
            $original = $str;
1430
1431
            if (
1432 99
                \strpos($str, '<') === false
1433
                &&
1434 99
                \strpos($str, '>') === false
1435
            ) {
1436 72
                return $str;
1437
            }
1438
1439 90
            if (!$this->_cache__evil_html_tags_str) {
1440 90
                $this->_cache__evil_html_tags_str = \implode('|', $this->_evil_html_tags);
1441
            }
1442
1443 90
            $str = (string) \preg_replace_callback(
1444 90
                '#<(?<start>/*\s*)(?<tagName>' . $this->_cache__evil_html_tags_str . ')(?<end>[^><]*)(?<rest>[><]*)#ius',
1445
                function ($matches) {
1446 51
                    return $this->_sanitize_naughty_html_callback($matches);
1447 90
                },
1448 90
                $str
1449
            );
1450
1451 90
            if (\strpos($str, '<') === false) {
1452 44
                return $str;
1453
            }
1454
1455
            if (
1456 72
                $this->_xss_found
1457
                &&
1458 72
                \trim($str) === '<'
1459
            ) {
1460 2
                return '';
1461
            }
1462
1463 72
            $str = (string) \preg_replace_callback(
1464 72
                '#<(?!!--|!\[)((?<start>/*\s*)((?<tagName>[\p{L}:]+)(?=[^\p{L}]|$|)|.+)[^\s"\'\p{L}>/=]*[^>]*)(?<closeTag>>)?#iusS', // tags without comments
1465
                function ($matches) {
1466
                    if (
1467 72
                        $this->_do_not_close_html_tags !== []
1468
                        &&
1469 72
                        isset($matches['tagName'])
1470
                        &&
1471 72
                        \in_array($matches['tagName'], $this->_do_not_close_html_tags, true)
1472
                    ) {
1473 1
                        return $matches[0];
1474
                    }
1475
1476 72
                    return $this->_close_html_callback($matches);
1477 72
                },
1478 72
                $str
1479
            );
1480
1481 72
            if ($str === $strEnd) {
1482 21
                return (string) $str;
1483
            }
1484
1485 72
            $strEnd = $str;
1486 72
        } while ($original !== $str);
1487
1488 60
        return (string) $str;
1489
    }
1490
1491
    /**
1492
     * @param string[] $matches
1493
     *
1494
     * @return mixed|string
1495
     */
1496 72
    private function _close_html_callback(array $matches)
1497
    {
1498 72
        if (empty($matches['closeTag'])) {
1499
            // allow e.g. "< $2.20"
1500 21
            if (\preg_match('/^[ .,\d=%€$₢₣£₤₶ℳ₥₦₧₨රුரூ௹रू₹૱₩₪₸₫֏₭₺₼₮₯₰₷₱﷼₲₾₳₴₽₵₡¢¥円৳元៛₠¤฿؋]*$/u', $matches[1])) {
1501 3
                return '<' . \str_replace(['>', '<'], ['&gt;', '&lt;'], $matches[1]);
1502
            }
1503
1504 20
            return '&lt;' . \str_replace(['>', '<'], ['&gt;', '&lt;'], $matches[1]);
1505
        }
1506
1507 67
        return '<' . \str_replace(['>', '<'], ['&gt;', '&lt;'], $matches[1]) . '>';
1508
    }
1509
1510
    /**
1511
     * Sanitize naughty HTML.
1512
     *
1513
     * <p>
1514
     * <br />
1515
     * Callback method for AntiXSS->sanitize_naughty_html() to remove naughty HTML elements.
1516
     * </p>
1517
     *
1518
     * @param string[] $matches
1519
     *
1520
     * @return string
1521
     */
1522 51
    private function _sanitize_naughty_html_callback(array $matches)
1523
    {
1524 51
        $fullMatch = $matches[0];
1525
1526
        // skip some edge-cases
1527
        /** @noinspection NotOptimalIfConditionsInspection */
1528
        if (
1529
            (
1530 51
                \strpos($fullMatch, '=') === false
1531
                &&
1532 51
                \strpos($fullMatch, ' ') === false
1533
                &&
1534 51
                \strpos($fullMatch, ':') === false
1535
                &&
1536 51
                \strpos($fullMatch, '/') === false
1537
                &&
1538 51
                \strpos($fullMatch, '\\') === false
1539
                &&
1540 51
                \stripos($fullMatch, '<' . $matches['tagName'] . '>') !== 0
1541
                &&
1542 51
                \stripos($fullMatch, '</' . $matches['tagName'] . '>') !== 0
1543
                &&
1544 6
                \stripos($fullMatch, '<' . $matches['tagName'] . '<') !== 0
1545
            )
1546
            ||
1547 51
            \preg_match('/<[\/]?' . $matches['tagName'] . '\p{L}+>/ius', $fullMatch) === 1
1548
        ) {
1549 6
            return $fullMatch;
1550
        }
1551
1552 48
        return '&lt;' . $matches['start'] . $matches['tagName'] . $matches['end'] // encode opening brace
1553
               // encode captured opening or closing brace to prevent recursive vectors
1554 48
               . \str_replace(
1555
                   [
1556 48
                       '>',
1557
                   ],
1558
                   [
1559 48
                       '&gt;',
1560
                   ],
1561 48
                   $matches['rest']
1562
               );
1563
    }
1564
1565
    /**
1566
     * Sanitize naughty scripting elements
1567
     *
1568
     * <p>
1569
     * <br />
1570
     *
1571
     * Similar to above, only instead of looking for
1572
     * tags it looks for PHP and JavaScript commands
1573
     * that are disallowed. Rather than removing the
1574
     * code, it simply converts the parenthesis to entities
1575
     * rendering the code un-executable.
1576
     *
1577
     * <br /><br />
1578
     *
1579
     * For example:  <pre>eval('some code')</pre>
1580
     * <br />
1581
     * Becomes:      <pre>eval&#40;'some code'&#41;</pre>
1582
     * </p>
1583
     *
1584
     * @param string $str
1585
     *
1586
     * @return string
1587
     */
1588 99
    private function _sanitize_naughty_javascript($str)
1589
    {
1590 99
        if (\strpos($str, '(') !== false) {
1591
            $patterns = [
1592 52
                'alert',
1593
                'prompt',
1594
                'confirm',
1595
                'cmd',
1596
                'passthru',
1597
                'eval',
1598
                'exec',
1599
                'execScript',
1600
                'setTimeout',
1601
                'setInterval',
1602
                'setImmediate',
1603
                'expression',
1604
                'system',
1605
                'fopen',
1606
                'fsockopen',
1607
                'file',
1608
                'file_get_contents',
1609
                'readfile',
1610
                'unlink',
1611
            ];
1612
1613 52
            $found = false;
1614 52
            foreach ($patterns as $pattern) {
1615 52
                if (\strpos($str, $pattern) !== false) {
1616 33
                    $found = true;
1617
1618 52
                    break;
1619
                }
1620
            }
1621
1622 52
            if ($found === true) {
1623 33
                $str = (string) \preg_replace(
1624 33
                    '#(' . \implode('|', $patterns) . ')(\s*)\((.*)\)#uisU',
1625 33
                    '\\1\\2&#40;\\3&#41;',
1626 33
                    $str
1627
                );
1628
            }
1629
        }
1630
1631 99
        return (string) $str;
1632
    }
1633
1634
    /**
1635
     * Add some strings to the "_evil_attributes"-array.
1636
     *
1637
     * @param string[] $strings
1638
     *
1639
     * @return $this
1640
     */
1641 2
    public function addEvilAttributes(array $strings): self
1642
    {
1643 2
        if ($strings === []) {
1644
            return $this;
1645
        }
1646
1647
        // reset
1648 2
        $this->_cache_evil_attributes_regex_string = '';
1649
1650 2
        $this->_evil_attributes_regex = \array_merge(
1651 2
            $strings,
1652 2
            $this->_evil_attributes_regex
1653
        );
1654
1655 2
        return $this;
1656
    }
1657
1658
    /**
1659
     * Add some strings to the "_evil_html_tags"-array.
1660
     *
1661
     * @param string[] $strings
1662
     *
1663
     * @return $this
1664
     */
1665 1
    public function addEvilHtmlTags(array $strings): self
1666
    {
1667 1
        if ($strings === []) {
1668
            return $this;
1669
        }
1670
1671
        // reset
1672 1
        $this->_cache__evil_html_tags_str = '';
1673
1674 1
        $this->_evil_html_tags = \array_merge(
1675 1
            $strings,
1676 1
            $this->_evil_html_tags
1677
        );
1678
1679 1
        return $this;
1680
    }
1681
1682
    /**
1683
     * Add some strings to the "_never_allowed_regex"-array.
1684
     *
1685
     * @param string[] $strings
1686
     *
1687
     * @return $this
1688
     */
1689 1
    public function addNeverAllowedRegex(array $strings): self
1690
    {
1691 1
        if ($strings === []) {
1692
            return $this;
1693
        }
1694
1695
        // reset
1696 1
        $this->_cache_never_allowed_regex_string = '';
1697
1698 1
        $this->_never_allowed_regex = \array_merge(
1699 1
            $strings,
1700 1
            $this->_never_allowed_regex
1701
        );
1702
1703 1
        return $this;
1704
    }
1705
1706
    /**
1707
     * Remove some strings from the "_never_allowed_regex"-array.
1708
     *
1709
     * <p>
1710
     * <br />
1711
     * WARNING: Use this method only if you have a really good reason.
1712
     * </p>
1713
     *
1714
     * @param string[] $strings
1715
     *
1716
     * @return $this
1717
     */
1718 2 View Code Duplication
    public function removeNeverAllowedRegex(array $strings): self
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
1719
    {
1720 2
        if ($strings === []) {
1721
            return $this;
1722
        }
1723
1724
        // reset
1725 2
        $this->_cache_never_allowed_regex_string = '';
1726
1727 2
        $this->_never_allowed_regex = \array_diff(
1728 2
            $this->_never_allowed_regex,
1729 2
            \array_intersect($strings, $this->_never_allowed_regex)
1730
        );
1731
1732 2
        return $this;
1733
    }
1734
1735
    /**
1736
     * Add some strings to the "_never_allowed_on_events_afterwards"-array.
1737
     *
1738
     * @param string[] $strings
1739
     *
1740
     * @return $this
1741
     */
1742 1 View Code Duplication
    public function addNeverAllowedOnEventsAfterwards(array $strings): self
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
1743
    {
1744 1
        if ($strings === []) {
1745
            return $this;
1746
        }
1747
1748
        // reset
1749 1
        $this->_cache_evil_attributes_regex_string = '';
1750
1751 1
        $this->_never_allowed_on_events_afterwards = \array_merge(
1752 1
            $strings,
1753 1
            $this->_never_allowed_on_events_afterwards
1754
        );
1755
1756 1
        return $this;
1757
    }
1758
1759
    /**
1760
     * Add some strings to the "_never_allowed_str_afterwards"-array.
1761
     *
1762
     * @param string[] $strings
1763
     *
1764
     * @return $this
1765
     */
1766 1
    public function addNeverAllowedStrAfterwards(array $strings): self
1767
    {
1768 1
        if ($strings === []) {
1769
            return $this;
1770
        }
1771
1772 1
        $this->_never_allowed_str_afterwards = \array_merge(
1773 1
            $strings,
1774 1
            $this->_never_allowed_str_afterwards
1775
        );
1776
1777 1
        return $this;
1778
    }
1779
1780
    /**
1781
     * Add some strings to the "_do_not_close_html_tags"-array.
1782
     *
1783
     * @param string[] $strings
1784
     *
1785
     * @return $this
1786
     */
1787 1
    public function addDoNotCloseHtmlTags(array $strings): self
1788
    {
1789 1
        if ($strings === []) {
1790
            return $this;
1791
        }
1792
1793 1
        $this->_do_not_close_html_tags = \array_merge(
1794 1
            $strings,
1795 1
            $this->_do_not_close_html_tags
1796
        );
1797
1798 1
        return $this;
1799
    }
1800
1801
    /**
1802
     * Remove some strings from the "_do_not_close_html_tags"-array.
1803
     *
1804
     * <p>
1805
     * <br />
1806
     * WARNING: Use this method only if you have a really good reason.
1807
     * </p>
1808
     *
1809
     * @param string[] $strings
1810
     *
1811
     * @return $this
1812
     */
1813 View Code Duplication
    public function removeDoNotCloseHtmlTags(array $strings): self
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
1814
    {
1815
        if ($strings === []) {
1816
            return $this;
1817
        }
1818
1819
        $this->_do_not_close_html_tags = \array_diff(
1820
            $this->_do_not_close_html_tags,
1821
            \array_intersect($strings, $this->_do_not_close_html_tags)
1822
        );
1823
1824
        return $this;
1825
    }
1826
1827
    /**
1828
     * Check if the "AntiXSS->xss_clean()"-method found an XSS attack in the last run.
1829
     *
1830
     * @return bool|null will return null if the "xss_clean()" wan't running at all
1831
     */
1832 64
    public function isXssFound()
1833
    {
1834 64
        return $this->_xss_found;
1835
    }
1836
1837
    /**
1838
     * Remove some strings from the "_evil_attributes"-array.
1839
     *
1840
     * <p>
1841
     * <br />
1842
     * WARNING: Use this method only if you have a really good reason.
1843
     * </p>
1844
     *
1845
     * @param string[] $strings
1846
     *
1847
     * @return $this
1848
     */
1849 2 View Code Duplication
    public function removeEvilAttributes(array $strings): self
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
1850
    {
1851 2
        if ($strings === []) {
1852
            return $this;
1853
        }
1854
1855
        // reset
1856 2
        $this->_cache_evil_attributes_regex_string = '';
1857
1858 2
        $this->_evil_attributes_regex = \array_diff(
1859 2
            $this->_evil_attributes_regex,
1860 2
            \array_intersect($strings, $this->_evil_attributes_regex)
1861
        );
1862
1863 2
        return $this;
1864
    }
1865
1866
    /**
1867
     * Remove some strings from the "_evil_html_tags"-array.
1868
     *
1869
     * <p>
1870
     * <br />
1871
     * WARNING: Use this method only if you have a really good reason.
1872
     * </p>
1873
     *
1874
     * @param string[] $strings
1875
     *
1876
     * @return $this
1877
     */
1878 2 View Code Duplication
    public function removeEvilHtmlTags(array $strings): self
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
1879
    {
1880 2
        if ($strings === []) {
1881
            return $this;
1882
        }
1883
1884
        // reset
1885 2
        $this->_cache__evil_html_tags_str = '';
1886
1887 2
        $this->_evil_html_tags = \array_diff(
1888 2
            $this->_evil_html_tags,
1889 2
            \array_intersect($strings, $this->_evil_html_tags)
1890
        );
1891
1892 2
        return $this;
1893
    }
1894
1895
    /**
1896
     * Remove some strings from the "_never_allowed_on_events_afterwards"-array.
1897
     *
1898
     * <p>
1899
     * <br />
1900
     * WARNING: Use this method only if you have a really good reason.
1901
     * </p>
1902
     *
1903
     * @param string[] $strings
1904
     *
1905
     * @return $this
1906
     */
1907 1 View Code Duplication
    public function removeNeverAllowedOnEventsAfterwards(array $strings): self
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
1908
    {
1909 1
        if ($strings === []) {
1910
            return $this;
1911
        }
1912
1913
        // reset
1914 1
        $this->_cache_evil_attributes_regex_string = '';
1915
1916 1
        $this->_never_allowed_on_events_afterwards = \array_diff(
1917 1
            $this->_never_allowed_on_events_afterwards,
1918 1
            \array_intersect($strings, $this->_never_allowed_on_events_afterwards)
1919
        );
1920
1921 1
        return $this;
1922
    }
1923
1924
    /**
1925
     * Remove some strings from the "_never_allowed_str_afterwards"-array.
1926
     *
1927
     * <p>
1928
     * <br />
1929
     * WARNING: Use this method only if you have a really good reason.
1930
     * </p>
1931
     *
1932
     * @param string[] $strings
1933
     *
1934
     * @return $this
1935
     */
1936 1 View Code Duplication
    public function removeNeverAllowedStrAfterwards(array $strings): self
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
1937
    {
1938 1
        if ($strings === []) {
1939
            return $this;
1940
        }
1941
1942 1
        $this->_never_allowed_str_afterwards = \array_diff(
1943 1
            $this->_never_allowed_str_afterwards,
1944 1
            \array_intersect($strings, $this->_never_allowed_str_afterwards)
1945
        );
1946
1947 1
        return $this;
1948
    }
1949
1950
    /**
1951
     * Set the replacement-string for not allowed strings.
1952
     *
1953
     * @param string $string
1954
     *
1955
     * @return $this
1956
     */
1957 51
    public function setReplacement($string): self
1958
    {
1959 51
        $this->_replacement = (string) $string;
1960
1961 51
        $this->_initNeverAllowedStr();
1962 51
        $this->_initNeverAllowedRegex();
1963
1964 51
        return $this;
1965
    }
1966
1967
    /**
1968
     * Set the option to stripe 4-Byte chars.
1969
     *
1970
     * <p>
1971
     * <br />
1972
     * INFO: use it if your DB (MySQL) can't use "utf8mb4" -> preventing stored XSS-attacks
1973
     * </p>
1974
     *
1975
     * @param bool $bool
1976
     *
1977
     * @return $this
1978
     */
1979 1
    public function setStripe4byteChars($bool): self
1980
    {
1981 1
        $this->_stripe_4byte_chars = (bool) $bool;
1982
1983 1
        return $this;
1984
    }
1985
1986
    /**
1987
     * XSS Clean
1988
     *
1989
     * <p>
1990
     * <br />
1991
     * Sanitizes data so that "Cross Site Scripting" hacks can be
1992
     * prevented. This method does a fair amount of work but
1993
     * it is extremely thorough, designed to prevent even the
1994
     * most obscure XSS attempts. But keep in mind that nothing
1995
     * is ever 100% foolproof...
1996
     * </p>
1997
     *
1998
     * <p>
1999
     * <br />
2000
     * <strong>Note:</strong> Should only be used to deal with data upon submission.
2001
     *   It's not something that should be used for general
2002
     *   runtime processing.
2003
     * </p>
2004
     *
2005
     * @see http://channel.bitflux.ch/wiki/XSS_Prevention
2006
     *    Based in part on some code and ideas from Bitflux.
2007
     * @see http://ha.ckers.org/xss.html
2008
     *    To help develop this script I used this great list of
2009
     *    vulnerabilities along with a few other hacks I've
2010
     *    harvested from examining vulnerabilities in other programs.
2011
     *
2012
     * @param array|mixed $str <p>input data e.g. string or array of strings</p>
2013
     *
2014
     * @return mixed
2015
     */
2016 99
    public function xss_clean($str)
2017
    {
2018
        // reset
2019 99
        $this->_xss_found = null;
2020
2021
        // check for an array of strings
2022 99
        if (\is_array($str)) {
2023 3
            foreach ($str as $key => $value) {
2024 3
                $str[$key] = $this->xss_clean($value);
2025
            }
2026
2027 3
            return $str;
2028
        }
2029
2030 99
        $old_str_backup = $str;
2031
2032
        // process
2033
        do {
2034 99
            $old_str = $str;
2035 99
            $str = $this->_do($str);
2036 99
        } while ($old_str !== $str);
2037
2038
        // keep the old value, if there wasn't any XSS attack
2039 99
        if ($this->_xss_found !== true) {
2040 55
            $str = $old_str_backup;
2041
        }
2042
2043 99
        return $str;
2044
    }
2045
}
2046