Completed
Push — master ( 9c631a...fcb278 )
by Maxime
03:34 queued 01:48
created

Security::xss_clean()   A

Complexity

Conditions 4
Paths 5

Size

Total Lines 132
Code Lines 36

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 24
CRAP Score 4.0218

Importance

Changes 0
Metric Value
cc 4
eloc 36
nc 5
nop 3
dl 0
loc 132
ccs 24
cts 27
cp 0.8889
crap 4.0218
rs 9.344
c 0
b 0
f 0

How to fix   Long Method   

Long Method

Small methods make your code easier to understand, in particular if combined with a good name. Besides, if your method is small, finding a good name is usually much easier.

For example, if you find yourself adding comments to a method's body, this is usually a good sign to extract the commented part to a new method, and use the comment as a starting point when coming up with a good name for this new method.

Commonly applied refactorings include:

1
<?php
2
/**
3
 * Created by PhpStorm.
4
 * User: mfrancois
5
 * Date: 04/07/2016
6
 * Time: 11:32
7
 */
8
9
namespace Distilleries\Security\Helpers;
10
11
12
class Security
13
{
14
15
16
    /**
17
     * Random Hash for protecting URLs
18
     *
19
     * @var string
20
     * @access protected
21
     */
22
    protected $_xss_hash = '';
23
24
    /**
25
     * List of never allowed strings
26
     *
27
     * @var array
28
     * @access protected
29
     */
30
    protected $_never_allowed_str = [
31
        'document.cookie' => '[removed]',
32
        'document.write' => '[removed]',
33
        '.parentNode' => '[removed]',
34
        '.innerHTML' => '[removed]',
35
        'window.location' => '[removed]',
36
        '-moz-binding' => '[removed]',
37
        '<!--' => '&lt;!--',
38
        '-->' => '--&gt;',
39
        '<![CDATA[' => '&lt;![CDATA[',
40
        '<comment>' => '&lt;comment&gt;'
41
    ];
42
43
    /* never allowed, regex replacement */
44
    /**
45
     * List of never allowed regex replacement
46
     *
47
     * @var array
48
     * @access protected
49
     */
50
    protected $_never_allowed_regex = [
51
        'javascript\s*:',
52
        'expression\s*(\(|&\#40;)', // CSS and IE
53
        'vbscript\s*:', // IE, surprise!
54
        'Redirect\s+302',
55
        "([\"'])?data\s*:[^\\1]*?base64[^\\1]*?,[^\\1]*?\\1?"
56
    ];
57
58
59
    /**
60
     * XSS Clean
61
     *
62
     * Sanitizes data so that Cross Site Scripting Hacks can be
63
     * prevented.  This function does a fair amount of work but
64
     * it is extremely thorough, designed to prevent even the
65
     * most obscure XSS attempts.  Nothing is ever 100% foolproof,
66
     * of course, but I haven't been able to get anything passed
67
     * the filter.
68
     *
69
     * Note: This function should only be used to deal with data
70
     * upon submission.  It's not something that should
71
     * be used for general runtime processing.
72
     *
73
     * This function was based in part on some code and ideas I
74
     * got from Bitflux: http://channel.bitflux.ch/wiki/XSS_Prevention
75
     *
76
     * To help develop this script I used this great list of
77
     * vulnerabilities along with a few other hacks I've
78
     * harvested from examining vulnerabilities in other programs:
79
     * http://ha.ckers.org/xss.html
80
     *
81
     * @param    mixed    string or array or boolean
82
     * @return    string | array
83
     */
84 12
    public function xss_clean($str, $is_image = false, $evilAttribute = true)
85
    {
86
        /*
87
           * Is the string an array?
88
           *
89
           */
90 12
        if (is_array($str)) {
91
            return $this->xssCleanFromArray($str, $is_image, $evilAttribute);
92
        }
93
94
        //Remove Invisible Characters
95 12
        $str = $this->remove_invisible_characters($str);
96
97
        // Validate Entities in URLs
98 12
        $str = $this->_validate_entities($str);
99
100
        /**
101
         * URL Decode
102
         *
103
         * Just in case stuff like this is submitted:
104
         *
105
         * <a href="http://%77%77%77%2E%67%6F%6F%67%6C%65%2E%63%6F%6D">Google</a>
106
         *
107
         * Note: Use rawurldecode() so it does not remove plus signs
108
         *
109
         **/
110 12
        $str = rawurldecode($str);
111
112
        /**
113
         * Convert character entities to ASCII
114
         *
115
         * This permits our tests below to work reliably.
116
         * We only convert entities that are within tags since
117
         * these are the ones that will pose security problems.
118
         *
119
         **/
120
121 12
        $str = preg_replace_callback("/[a-z]+=([\'\"]).*?\\1/si", [$this, '_convert_attribute'], $str);
122 12
        $str = preg_replace_callback("/<\w+.*?(?=>|<|$)/si", [$this, '_decode_entity'], $str);
123
124
        /**
125
         * Remove Invisible Characters Again!
126
         **/
127 12
        $str = $this->remove_invisible_characters($str);
128
129
        /**
130
         * Convert all tabs to spaces
131
         *
132
         * This prevents strings like this: ja    vascript
133
         * NOTE: we deal with spaces between characters later.
134
         * NOTE: preg_replace was found to be amazingly slow here on
135
         * large blocks of data, so we use str_replace.
136
         **/
137 12
        if (strpos($str, "\t") !== false) {
138
            $str = str_replace("\t", ' ', $str);
139
        }
140
141
        // Remove Strings that are never allowed
142 12
        $str = $this->_do_never_allowed($str);
143
144
        /*
145
           * Makes PHP tags safe
146
           *
147
           * Note: XML tags are inadvertently replaced too:
148
           *
149
           * <?xml
150
           *
151
           * But it doesn't seem to pose a problem.
152
           */
153 12
        if ($is_image === true) {
154
            // Images have a tendency to have the PHP short opening and
155
            // closing tags every so often so we skip those and only
156
            // do the long opening tags.
157
            $str = preg_replace('/<\?(php)/i', "&lt;?\\1", $str);
158
        } else {
159 12
            $str = str_replace(['<?', '?' . '>'], ['&lt;?', '?&gt;'], $str);
160
        }
161
162 12
        $str = $this->compactedWords([
163 12
            'javascript',
164
            'expression',
165
            'vbscript',
166
            'script',
167
            'applet',
168
            'alert',
169
            'document',
170
            'write',
171
            'cookie',
172
            'window'
173 12
        ], $str);
174
175
176 12
        $str = $this->disallowedJavascriptInLinks($str);
177
178
        // Remove evil attributes such as style, onclick and xmlns
179 12
        $str = $this->_remove_evil_attributes($str, $is_image, $evilAttribute);
180
181
        /*
182
           * Sanitize naughty HTML elements
183
           *
184
           * If a tag containing any of the words in the list
185
           * below is found, the tag gets converted to entities.
186
           *
187
           * So this: <blink>
188
           * Becomes: &lt;blink&gt;
189
           */
190 12
        $naughty = 'alert|applet|audio|basefont|base|behavior|bgsound|blink|body|embed|expression|form|frameset|frame|head|html|ilayer|iframe|input|isindex|layer|link|meta|object|plaintext|style|script|textarea|title|video|xml|xss';
191 12
        $str = preg_replace_callback('#<(/*\s*)(' . $naughty . ')([^><]*)([><]*)#is', [$this, '_sanitize_naughty_html'],
192 12
            $str);
193
194
        /*
195
           * Sanitize naughty scripting elements
196
           *
197
           * Similar to above, only instead of looking for
198
           * tags it looks for PHP and JavaScript commands
199
           * that are disallowed.  Rather than removing the
200
           * code, it simply converts the parenthesis to entities
201
           * rendering the code un-executable.
202
           *
203
           * For example:	eval('some code')
204
           * Becomes:		eval&#40;'some code'&#41;
205
           */
206 12
        $str = preg_replace('#(alert|cmd|passthru|eval|exec|expression|system|fopen|fsockopen|file|file_get_contents|readfile|unlink)(\s*)\((.*?)\)#si',
207 12
            "\\1\\2&#40;\\3&#41;", $str);
208
209
210
        // Final clean up
211
        // This adds a bit of extra precaution in case
212
        // something got through the above filters
213 12
        $str = $this->_do_never_allowed($str);
214
215 12
        return $str;
216
    }
217
218
    /**
219
     * Compact any exploded words
220
     *
221
     * This corrects words like:  j a v a s c r i p t
222
     * These words are compacted back to their correct state.
223
     **/
224 12
    protected function compactedWords($words, $str)
225
    {
226 12
        foreach ($words as $word) {
227 12
            $temp = '';
228
229 12
            for ($i = 0, $wordlen = strlen($word); $i < $wordlen; $i++) {
230 12
                $temp .= substr($word, $i, 1) . "\s*";
231
            }
232
233
            // We only want to do this when it is followed by a non-word character
234
            // That way valid stuff like "dealer to" does not become "dealerto"
235 12
            $str = preg_replace_callback('#(' . substr($temp, 0, -3) . ')(\W)#is', [$this, '_compact_exploded_words'],
236 12
                $str);
237
        }
238
239 12
        return $str;
240
    }
241
242
    /**
243
     * @param $str
244
     * @param bool $is_image
245
     * @param bool $evilAttribute
246
     * @return array
247
     */
248
    protected function xssCleanFromArray($str, $is_image = false, $evilAttribute = true)
249
    {
250
        foreach ($str as $key => $value) {
251
            $str[$key] = $this->xss_clean($str[$key], $is_image, $evilAttribute);
252
        }
253
254
        return $str;
255
    }
256
257
    /*
258
     * Remove disallowed Javascript in links or img tags
259
     * We used to do some version comparisons and use of stripos for PHP5,
260
     * but it is dog slow compared to these simplified non-capturing
261
     * preg_match(), especially if the pattern exists in the string
262
     */
263 12
    protected function disallowedJavascriptInLinks($str)
264
    {
265
        do {
266 12
            $original = $str;
267
268 12
            if (preg_match("/<a/i", $str)) {
269 4
                $str = preg_replace_callback("#<a\s+([^>]*?)(>|$)#si", [$this, '_js_link_removal'], $str);
270
            }
271
272 12
            if (preg_match("/<img/i", $str)) {
273 2
                $str = preg_replace_callback("#<img\s+([^>]*?)(\s?/?>|$)#si", [$this, '_js_img_removal'], $str);
274
            }
275
276 12
            if (preg_match("/script/i", $str) OR preg_match("/xss/i", $str)) {
277 2
                $str = preg_replace("#<(/*)(script|xss)(.*?)\>#si", '[removed]', $str);
278
            }
279 12
        } while ($original != $str);
280
281 12
        unset($original);
282
283 12
        return $str;
284
    }
285
286
    // --------------------------------------------------------------------
287
288
    /*
289
      * Remove Evil HTML Attributes (like evenhandlers and style)
290
      *
291
      * It removes the evil attribute and either:
292
      * 	- Everything up until a space
293
      *		For example, everything between the pipes:
294
      *		<a |style=document.write('hello');alert('world');| class=link>
295
      * 	- Everything inside the quotes
296
      *		For example, everything between the pipes:
297
      *		<a |style="document.write('hello'); alert('world');"| class="link">
298
      *
299
      * @param string $str The string to check
300
      * @param boolean $is_image TRUE if this is an image
301
      * @return string The string with the evil attributes removed
302
      */
303 12
    protected function _remove_evil_attributes($str, $is_image, $evilAttribute)
304
    {
305
        // All javascript event handlers (e.g. onload, onclick, onmouseover), style, and xmlns
306 12
        if ($evilAttribute) {
307 12
            $evil_attributes = ['on\w*', 'style', 'xmlns'];
308
        } else {
309
            $evil_attributes = ['on\w*', 'xmlns'];
310
        }
311
312 12
        if ($is_image === true) {
313
            /*
314
                * Adobe Photoshop puts XML metadata into JFIF images,
315
                * including namespacing, so we have to allow this for images.
316
                */
317
            unset($evil_attributes[array_search('xmlns', $evil_attributes)]);
318
        }
319
320
        do {
321 12
            $str = preg_replace(
322 12
                "#<(/?[^><]+?)([^A-Za-z\-])(" . implode('|',
323 12
                    $evil_attributes) . ")(\s*=\s*)([\"][^>]*?[\"]|[\'][^>]*?[\']|[^>]*?)([\s><])([><]*)#i",
324 12
                "<$1$6",
325 12
                $str, -1, $count
326
            );
327 12
        } while ($count);
328
329 12
        return $str;
330
    }
331
332
333
334
    // --------------------------------------------------------------------
335
336
    /**
337
     * HTML Entities Decode
338
     *
339
     * This function is a replacement for html_entity_decode()
340
     *
341
     * The reason we are not using html_entity_decode() by itself is because
342
     * while it is not technically correct to leave out the semicolon
343
     * at the end of an entity most browsers will still interpret the entity
344
     * correctly.  html_entity_decode() does not convert entities without
345
     * semicolons, so we are left with our own little solution here. Bummer.
346
     *
347
     * @param    string
348
     * @param    string
349
     * @return    string
350
     */
351 10
    public function entity_decode($str, $charset = 'UTF-8')
352
    {
353 10
        if (stristr($str, '&') === false) {
354 8
            return $str;
355
        }
356
357 2
        $str = html_entity_decode($str, ENT_COMPAT, $charset);
358
        $str = preg_replace_callback('~&#x(0*[0-9a-f]{2,5})~i', function ($matches) {
359
            return chr(hexdec($matches[1]));
0 ignored issues
show
Bug introduced by
It seems like hexdec($matches[1]) can also be of type double; however, parameter $ascii of chr() does only seem to accept integer, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

359
            return chr(/** @scrutinizer ignore-type */ hexdec($matches[1]));
Loading history...
360 2
        }, $str);
361
        return preg_replace_callback('~&#([0-9]{2,4})~', function ($matches) {
362
            return chr($matches[1]);
363 2
        }, $str);
364
    }
365
366
    // --------------------------------------------------------------------
367
368
    /**
369
     * Filename Security
370
     *
371
     * @param    string
372
     * @param    bool
373
     * @return    string
374
     */
375 4
    public function sanitize_filename($str, $relative_path = false)
376
    {
377
        $bad = [
378 4
            "../",
379
            "<!--",
380
            "-->",
381
            "<",
382
            ">",
383
            "'",
384
            '"',
385
            '&',
386
            '$',
387
            '#',
388
            '{',
389
            '}',
390
            '[',
391
            ']',
392
            '=',
393
            ';',
394
            '?',
395
            "%20",
396
            "%22",
397
            "%3c", // <
398
            "%253c", // <
399
            "%3e", // >
400
            "%0e", // >
401
            "%28", // (
402
            "%29", // )
403
            "%2528", // (
404
            "%26", // &
405
            "%24", // $
406
            "%3f", // ?
407
            "%3b", // ;
408
            "%3d"        // =
409
        ];
410
411 4
        if (!$relative_path) {
412 2
            $bad[] = './';
413 2
            $bad[] = '/';
414
        }
415
416 4
        $str = $this->remove_invisible_characters($str, false);
417
418 4
        return stripslashes(str_replace($bad, '', $str));
419
    }
420
421
    // ----------------------------------------------------------------
422
423
    /**
424
     * Compact Exploded Words
425
     *
426
     * Callback function for xss_clean() to remove whitespace from
427
     * things like j a v a s c r i p t
428
     *
429
     * @param    type
430
     * @return    type
0 ignored issues
show
Bug introduced by
The type Distilleries\Security\Helpers\type was not found. Maybe you did not declare it correctly or list all dependencies?

The issue could also be caused by a filter entry in the build configuration. If the path has been excluded in your configuration, e.g. excluded_paths: ["lib/*"], you can move it to the dependency path list as follows:

filter:
    dependency_paths: ["lib/*"]

For further information see https://scrutinizer-ci.com/docs/tools/php/php-scrutinizer/#list-dependency-paths

Loading history...
431
     */
432 6
    protected function _compact_exploded_words($matches)
433
    {
434 6
        return preg_replace('/\s+/s', '', $matches[1]) . $matches[2];
0 ignored issues
show
Bug Best Practice introduced by
The expression return preg_replace('/\s...tches[1]) . $matches[2] returns the type string which is incompatible with the documented return type Distilleries\Security\Helpers\type.
Loading history...
435
    }
436
437
    // --------------------------------------------------------------------
438
439
    /**
440
     * Sanitize Naughty HTML
441
     *
442
     * Callback function for xss_clean() to remove naughty HTML elements
443
     *
444
     * @param    array
445
     * @return    string
446
     */
447 4
    protected function _sanitize_naughty_html($matches)
448
    {
449
        // encode opening brace
450 4
        $str = '&lt;' . $matches[1] . $matches[2] . $matches[3];
451
452
        // encode captured opening or closing brace to prevent recursive vectors
453 4
        $str .= str_replace(['>', '<'], ['&gt;', '&lt;'],
454 4
            $matches[4]);
455
456 4
        return $str;
457
    }
458
459
    // --------------------------------------------------------------------
460
461
    /**
462
     * JS Link Removal
463
     *
464
     * Callback function for xss_clean() to sanitize links
465
     * This limits the PCRE backtracks, making it more performance friendly
466
     * and prevents PREG_BACKTRACK_LIMIT_ERROR from being triggered in
467
     * PHP 5.2+ on link-heavy strings
468
     *
469
     * @param    array
470
     * @return    string
471
     */
472 2
    protected function _js_link_removal($match)
473
    {
474 2
        return str_replace(
475 2
            $match[1],
476 2
            preg_replace(
477 2
                '#href=.*?(alert\(|alert&\#40;|javascript\:|livescript\:|mocha\:|charset\=|window\.|document\.|\.cookie|<script|<xss|data\s*:)#si',
478 2
                '',
479 2
                $this->_filter_attributes(str_replace(['<', '>'], '', $match[1]))
480
            ),
481 2
            $match[0]
482
        );
483
    }
484
485
    // --------------------------------------------------------------------
486
487
    /**
488
     * JS Image Removal
489
     *
490
     * Callback function for xss_clean() to sanitize image tags
491
     * This limits the PCRE backtracks, making it more performance friendly
492
     * and prevents PREG_BACKTRACK_LIMIT_ERROR from being triggered in
493
     * PHP 5.2+ on image tag heavy strings
494
     *
495
     * @param    array
496
     * @return    string
497
     */
498 2
    protected function _js_img_removal($match)
499
    {
500 2
        return str_replace(
501 2
            $match[1],
502 2
            preg_replace(
503 2
                '#src=.*?(alert\(|alert&\#40;|javascript\:|livescript\:|mocha\:|charset\=|window\.|document\.|\.cookie|<script|<xss|base64\s*,)#si',
504 2
                '',
505 2
                $this->_filter_attributes(str_replace(['<', '>'], '', $match[1]))
506
            ),
507 2
            $match[0]
508
        );
509
    }
510
511
    // --------------------------------------------------------------------
512
513
    /**
514
     * Attribute Conversion
515
     *
516
     * Used as a callback for XSS Clean
517
     *
518
     * @param    array
519
     * @return    string
520
     */
521 4
    protected function _convert_attribute($match)
522
    {
523 4
        return str_replace(['>', '<', '\\'], ['&gt;', '&lt;', '\\\\'], $match[0]);
524
    }
525
526
    // --------------------------------------------------------------------
527
528
    /**
529
     * Filter Attributes
530
     *
531
     * Filters tag attributes for consistency and safety
532
     *
533
     * @param    string
534
     * @return    string
535
     */
536 2
    protected function _filter_attributes($str)
537
    {
538 2
        $out = '';
539
540 2
        if (preg_match_all('#\s*[a-z\-]+\s*=\s*(\042|\047)([^\\1]*?)\\1#is', $str, $matches)) {
541 2
            foreach ($matches[0] as $match) {
542 2
                $out .= preg_replace("#/\*.*?\*/#s", '', $match);
543
            }
544
        }
545
546 2
        return $out;
547
    }
548
549
    // --------------------------------------------------------------------
550
551
    /**
552
     * HTML Entity Decode Callback
553
     *
554
     * Used as a callback for XSS Clean
555
     *
556
     * @param    array
557
     * @return    string
558
     */
559 8
    protected function _decode_entity($match)
560
    {
561 8
        return $this->entity_decode($match[0]);
562
    }
563
564
    // --------------------------------------------------------------------
565
566
    /**
567
     * Validate URL entities
568
     *
569
     * Called by xss_clean()
570
     *
571
     * @param    string
572
     * @return    string
573
     */
574 12
    protected function _validate_entities($str)
575
    {
576
        /*
577
         * Protect GET variables in URLs
578
         */
579
580
        // 901119URL5918AMP18930PROTECT8198
581
582 12
        $str = preg_replace('|\&([a-z\_0-9\-]+)\=([a-z\_0-9\-]+)|i', $this->xss_hash() . "\\1=\\2", $str);
583
584
        /*
585
         * Validate standard character entities
586
         *
587
         * Add a semicolon if missing.  We do this to enable
588
         * the conversion of entities to ASCII later.
589
         *
590
         */
591 12
        $str = preg_replace('#(&\#?[0-9a-z]{2,})([\x00-\x20])*;?#i', "\\1;\\2", $str);
592
593
        /*
594
         * Validate UTF16 two byte encoding (x00)
595
         *
596
         * Just as above, adds a semicolon if missing.
597
         *
598
         */
599 12
        $str = preg_replace('#(&\#x?)([0-9A-F]+);?#i', "\\1\\2;", $str);
600
601
602 12
        return $str;
603
    }
604
605
    // ----------------------------------------------------------------------
606
607
    /**
608
     * Do Never Allowed
609
     *
610
     * A utility function for xss_clean()
611
     *
612
     * @param    string
613
     * @return    string
614
     */
615 12
    protected function _do_never_allowed($str)
616
    {
617 12
        $str = str_replace(array_keys($this->_never_allowed_str), $this->_never_allowed_str, $str);
618
619 12
        foreach ($this->_never_allowed_regex as $regex) {
620 12
            $str = preg_replace('#' . $regex . '#is', '[removed]', $str);
621
        }
622
623 12
        return $str;
624
    }
625
626
627 16
    protected function remove_invisible_characters($str, $url_encoded = true)
628
    {
629 16
        $non_displayables = [];
630
631
        // every control character except newline (dec 10)
632
        // carriage return (dec 13), and horizontal tab (dec 09)
633
634 16
        if ($url_encoded) {
635 12
            $non_displayables[] = '/%0[0-8bcef]/'; // url encoded 00-08, 11, 12, 14, 15
636 12
            $non_displayables[] = '/%1[0-9a-f]/'; // url encoded 16-31
637
        }
638
639 16
        $non_displayables[] = '/[\x00-\x08\x0B\x0C\x0E-\x1F\x7F]+/S'; // 00-08, 11, 12, 14-31, 127
640
641
        do {
642 16
            $str = preg_replace($non_displayables, '', $str, -1, $count);
643 16
        } while ($count);
644
645 16
        return $str;
646
    }
647
648
    /**
649
     * Random Hash for protecting URLs
650
     *
651
     * @return    string
652
     */
653 12
    public function xss_hash()
654
    {
655 12
        if ($this->_xss_hash == '') {
656 12
            mt_srand();
657 12
            $this->_xss_hash = md5(time() + mt_rand(0, 1999999999));
658
        }
659
660 12
        return $this->_xss_hash;
661
    }
662
663
664 2
    public static function escapeLike($str, $escape = '\'\'')
665
    {
666
667 2
        return str_replace(
668 2
            ['%', '_', '\'', '"', '<', '>', '(', ')', '{', ']', ':', '/', '\\'],
669 2
            ['\%', '\_', $escape, '\"', '\<', '\>', '\(', '\)', '\{', '\}', '\:', '\/', '\\\\'], $str);
670
    }
671
}