Completed
Push — master ( f5a958...6ea638 )
by Maxime
03:34
created

Security::xss_clean()   C

Complexity

Conditions 12
Paths 98

Size

Total Lines 180
Code Lines 52

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 36
CRAP Score 12.2614

Importance

Changes 0
Metric Value
cc 12
eloc 52
nc 98
nop 3
dl 0
loc 180
ccs 36
cts 41
cp 0.878
crap 12.2614
rs 6.9666
c 0
b 0
f 0

How to fix   Long Method    Complexity   

Long Method

Small methods make your code easier to understand, in particular if combined with a good name. Besides, if your method is small, finding a good name is usually much easier.

For example, if you find yourself adding comments to a method's body, this is usually a good sign to extract the commented part to a new method, and use the comment as a starting point when coming up with a good name for this new method.

Commonly applied refactorings include:

1
<?php
2
/**
3
 * Created by PhpStorm.
4
 * User: mfrancois
5
 * Date: 04/07/2016
6
 * Time: 11:32
7
 */
8
9
namespace Distilleries\Security\Helpers;
10
11
12
class Security
13
{
14
15
16
    /**
17
     * Random Hash for protecting URLs
18
     *
19
     * @var string
20
     * @access protected
21
     */
22
    protected $_xss_hash = '';
23
24
    /**
25
     * List of never allowed strings
26
     *
27
     * @var array
28
     * @access protected
29
     */
30
    protected $_never_allowed_str = [
31
        'document.cookie' => '[removed]',
32
        'document.write' => '[removed]',
33
        '.parentNode' => '[removed]',
34
        '.innerHTML' => '[removed]',
35
        'window.location' => '[removed]',
36
        '-moz-binding' => '[removed]',
37
        '<!--' => '&lt;!--',
38
        '-->' => '--&gt;',
39
        '<![CDATA[' => '&lt;![CDATA[',
40
        '<comment>' => '&lt;comment&gt;'
41
    ];
42
43
    /* never allowed, regex replacement */
44
    /**
45
     * List of never allowed regex replacement
46
     *
47
     * @var array
48
     * @access protected
49
     */
50
    protected $_never_allowed_regex = [
51
        'javascript\s*:',
52
        'expression\s*(\(|&\#40;)', // CSS and IE
53
        'vbscript\s*:', // IE, surprise!
54
        'Redirect\s+302',
55
        "([\"'])?data\s*:[^\\1]*?base64[^\\1]*?,[^\\1]*?\\1?"
56
    ];
57
58
59
    /**
60
     * XSS Clean
61
     *
62
     * Sanitizes data so that Cross Site Scripting Hacks can be
63
     * prevented.  This function does a fair amount of work but
64
     * it is extremely thorough, designed to prevent even the
65
     * most obscure XSS attempts.  Nothing is ever 100% foolproof,
66
     * of course, but I haven't been able to get anything passed
67
     * the filter.
68
     *
69
     * Note: This function should only be used to deal with data
70
     * upon submission.  It's not something that should
71
     * be used for general runtime processing.
72
     *
73
     * This function was based in part on some code and ideas I
74
     * got from Bitflux: http://channel.bitflux.ch/wiki/XSS_Prevention
75
     *
76
     * To help develop this script I used this great list of
77
     * vulnerabilities along with a few other hacks I've
78
     * harvested from examining vulnerabilities in other programs:
79
     * http://ha.ckers.org/xss.html
80
     *
81
     * @param    mixed    string or array or boolean
82
     * @return    string
83
     */
84 12
    public function xss_clean($str, $is_image = false, $evilAttribute = true)
85
    {
86
        /*
87
           * Is the string an array?
88
           *
89
           */
90 12
        if (is_array($str)) {
91
            foreach ($str as $key => $value) {
92
                $str[$key] = $this->xss_clean($str[$key], $is_image, $evilAttribute);
93
            }
94
95
            return $str;
0 ignored issues
show
Bug Best Practice introduced by
The expression return $str returns the type array which is incompatible with the documented return type string.
Loading history...
96
        }
97
98
        /*
99
           * Remove Invisible Characters
100
           */
101 12
        $str = $this->remove_invisible_characters($str);
102
103
        // Validate Entities in URLs
104 12
        $str = $this->_validate_entities($str);
105
106
        /*
107
           * URL Decode
108
           *
109
           * Just in case stuff like this is submitted:
110
           *
111
           * <a href="http://%77%77%77%2E%67%6F%6F%67%6C%65%2E%63%6F%6D">Google</a>
112
           *
113
           * Note: Use rawurldecode() so it does not remove plus signs
114
           *
115
           */
116 12
        $str = rawurldecode($str);
117
118
        /*
119
           * Convert character entities to ASCII
120
           *
121
           * This permits our tests below to work reliably.
122
           * We only convert entities that are within tags since
123
           * these are the ones that will pose security problems.
124
           *
125
           */
126
127 12
        $str = preg_replace_callback("/[a-z]+=([\'\"]).*?\\1/si", [$this, '_convert_attribute'], $str);
128
129 12
        $str = preg_replace_callback("/<\w+.*?(?=>|<|$)/si", [$this, '_decode_entity'], $str);
130
131
        /*
132
           * Remove Invisible Characters Again!
133
           */
134 12
        $str = $this->remove_invisible_characters($str);
135
136
        /*
137
           * Convert all tabs to spaces
138
           *
139
           * This prevents strings like this: ja	vascript
140
           * NOTE: we deal with spaces between characters later.
141
           * NOTE: preg_replace was found to be amazingly slow here on
142
           * large blocks of data, so we use str_replace.
143
           */
144
145 12
        if (strpos($str, "\t") !== false) {
146
            $str = str_replace("\t", ' ', $str);
147
        }
148
149
        // Remove Strings that are never allowed
150 12
        $str = $this->_do_never_allowed($str);
151
152
        /*
153
           * Makes PHP tags safe
154
           *
155
           * Note: XML tags are inadvertently replaced too:
156
           *
157
           * <?xml
158
           *
159
           * But it doesn't seem to pose a problem.
160
           */
161 12
        if ($is_image === true) {
162
            // Images have a tendency to have the PHP short opening and
163
            // closing tags every so often so we skip those and only
164
            // do the long opening tags.
165
            $str = preg_replace('/<\?(php)/i', "&lt;?\\1", $str);
166
        } else {
167 12
            $str = str_replace(['<?', '?' . '>'], ['&lt;?', '?&gt;'], $str);
168
        }
169
170
        /*
171
           * Compact any exploded words
172
           *
173
           * This corrects words like:  j a v a s c r i p t
174
           * These words are compacted back to their correct state.
175
           */
176
        $words = [
177 12
            'javascript',
178
            'expression',
179
            'vbscript',
180
            'script',
181
            'applet',
182
            'alert',
183
            'document',
184
            'write',
185
            'cookie',
186
            'window'
187
        ];
188
189 12
        foreach ($words as $word) {
190 12
            $temp = '';
191
192 12
            for ($i = 0, $wordlen = strlen($word); $i < $wordlen; $i++) {
193 12
                $temp .= substr($word, $i, 1) . "\s*";
194
            }
195
196
            // We only want to do this when it is followed by a non-word character
197
            // That way valid stuff like "dealer to" does not become "dealerto"
198 12
            $str = preg_replace_callback('#(' . substr($temp, 0, -3) . ')(\W)#is', [$this, '_compact_exploded_words'],
199 12
                $str);
200
        }
201
202
        /*
203
           * Remove disallowed Javascript in links or img tags
204
           * We used to do some version comparisons and use of stripos for PHP5,
205
           * but it is dog slow compared to these simplified non-capturing
206
           * preg_match(), especially if the pattern exists in the string
207
           */
208
        do {
209 12
            $original = $str;
210
211 12
            if (preg_match("/<a/i", $str)) {
212 4
                $str = preg_replace_callback("#<a\s+([^>]*?)(>|$)#si", [$this, '_js_link_removal'], $str);
213
            }
214
215 12
            if (preg_match("/<img/i", $str)) {
216 2
                $str = preg_replace_callback("#<img\s+([^>]*?)(\s?/?>|$)#si", [$this, '_js_img_removal'], $str);
217
            }
218
219 12
            if (preg_match("/script/i", $str) OR preg_match("/xss/i", $str)) {
220 2
                $str = preg_replace("#<(/*)(script|xss)(.*?)\>#si", '[removed]', $str);
221
            }
222 12
        } while ($original != $str);
223
224 12
        unset($original);
225
226
        // Remove evil attributes such as style, onclick and xmlns
227 12
        $str = $this->_remove_evil_attributes($str, $is_image, $evilAttribute);
228
229
        /*
230
           * Sanitize naughty HTML elements
231
           *
232
           * If a tag containing any of the words in the list
233
           * below is found, the tag gets converted to entities.
234
           *
235
           * So this: <blink>
236
           * Becomes: &lt;blink&gt;
237
           */
238 12
        $naughty = 'alert|applet|audio|basefont|base|behavior|bgsound|blink|body|embed|expression|form|frameset|frame|head|html|ilayer|iframe|input|isindex|layer|link|meta|object|plaintext|style|script|textarea|title|video|xml|xss';
239 12
        $str = preg_replace_callback('#<(/*\s*)(' . $naughty . ')([^><]*)([><]*)#is', [$this, '_sanitize_naughty_html'],
240 12
            $str);
241
242
        /*
243
           * Sanitize naughty scripting elements
244
           *
245
           * Similar to above, only instead of looking for
246
           * tags it looks for PHP and JavaScript commands
247
           * that are disallowed.  Rather than removing the
248
           * code, it simply converts the parenthesis to entities
249
           * rendering the code un-executable.
250
           *
251
           * For example:	eval('some code')
252
           * Becomes:		eval&#40;'some code'&#41;
253
           */
254 12
        $str = preg_replace('#(alert|cmd|passthru|eval|exec|expression|system|fopen|fsockopen|file|file_get_contents|readfile|unlink)(\s*)\((.*?)\)#si',
255 12
            "\\1\\2&#40;\\3&#41;", $str);
256
257
258
        // Final clean up
259
        // This adds a bit of extra precaution in case
260
        // something got through the above filters
261 12
        $str = $this->_do_never_allowed($str);
262
263 12
        return $str;
264
    }
265
266
    // --------------------------------------------------------------------
267
268
    /*
269
      * Remove Evil HTML Attributes (like evenhandlers and style)
270
      *
271
      * It removes the evil attribute and either:
272
      * 	- Everything up until a space
273
      *		For example, everything between the pipes:
274
      *		<a |style=document.write('hello');alert('world');| class=link>
275
      * 	- Everything inside the quotes
276
      *		For example, everything between the pipes:
277
      *		<a |style="document.write('hello'); alert('world');"| class="link">
278
      *
279
      * @param string $str The string to check
280
      * @param boolean $is_image TRUE if this is an image
281
      * @return string The string with the evil attributes removed
282
      */
283 12
    protected function _remove_evil_attributes($str, $is_image, $evilAttribute)
284
    {
285
        // All javascript event handlers (e.g. onload, onclick, onmouseover), style, and xmlns
286 12
        if ($evilAttribute) {
287 12
            $evil_attributes = ['on\w*', 'style', 'xmlns'];
288
        } else {
289
            $evil_attributes = ['on\w*', 'xmlns'];
290
        }
291
292 12
        if ($is_image === true) {
293
            /*
294
                * Adobe Photoshop puts XML metadata into JFIF images,
295
                * including namespacing, so we have to allow this for images.
296
                */
297
            unset($evil_attributes[array_search('xmlns', $evil_attributes)]);
298
        }
299
300
        do {
301 12
            $str = preg_replace(
302 12
                "#<(/?[^><]+?)([^A-Za-z\-])(" . implode('|',
303 12
                    $evil_attributes) . ")(\s*=\s*)([\"][^>]*?[\"]|[\'][^>]*?[\']|[^>]*?)([\s><])([><]*)#i",
304 12
                "<$1$6",
305 12
                $str, -1, $count
306
            );
307 12
        } while ($count);
308
309 12
        return $str;
310
    }
311
312
313
314
    // --------------------------------------------------------------------
315
316
    /**
317
     * HTML Entities Decode
318
     *
319
     * This function is a replacement for html_entity_decode()
320
     *
321
     * The reason we are not using html_entity_decode() by itself is because
322
     * while it is not technically correct to leave out the semicolon
323
     * at the end of an entity most browsers will still interpret the entity
324
     * correctly.  html_entity_decode() does not convert entities without
325
     * semicolons, so we are left with our own little solution here. Bummer.
326
     *
327
     * @param    string
328
     * @param    string
329
     * @return    string
330
     */
331 10
    public function entity_decode($str, $charset = 'UTF-8')
332
    {
333 10
        if (stristr($str, '&') === false) {
334 8
            return $str;
335
        }
336
337 2
        $str = html_entity_decode($str, ENT_COMPAT, $charset);
338
        $str = preg_replace_callback('~&#x(0*[0-9a-f]{2,5})~i', function ($matches) {
339
            return chr(hexdec($matches[1]));
0 ignored issues
show
Bug introduced by
It seems like hexdec($matches[1]) can also be of type double; however, parameter $ascii of chr() does only seem to accept integer, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

339
            return chr(/** @scrutinizer ignore-type */ hexdec($matches[1]));
Loading history...
340 2
        }, $str);
341
        return preg_replace_callback('~&#([0-9]{2,4})~', function ($matches) {
342
            return chr($matches[1]);
343 2
        }, $str);
344
    }
345
346
    // --------------------------------------------------------------------
347
348
    /**
349
     * Filename Security
350
     *
351
     * @param    string
352
     * @param    bool
353
     * @return    string
354
     */
355 4
    public function sanitize_filename($str, $relative_path = false)
356
    {
357
        $bad = [
358 4
            "../",
359
            "<!--",
360
            "-->",
361
            "<",
362
            ">",
363
            "'",
364
            '"',
365
            '&',
366
            '$',
367
            '#',
368
            '{',
369
            '}',
370
            '[',
371
            ']',
372
            '=',
373
            ';',
374
            '?',
375
            "%20",
376
            "%22",
377
            "%3c", // <
378
            "%253c", // <
379
            "%3e", // >
380
            "%0e", // >
381
            "%28", // (
382
            "%29", // )
383
            "%2528", // (
384
            "%26", // &
385
            "%24", // $
386
            "%3f", // ?
387
            "%3b", // ;
388
            "%3d"        // =
389
        ];
390
391 4
        if (!$relative_path) {
392 2
            $bad[] = './';
393 2
            $bad[] = '/';
394
        }
395
396 4
        $str = $this->remove_invisible_characters($str, false);
397
398 4
        return stripslashes(str_replace($bad, '', $str));
399
    }
400
401
    // ----------------------------------------------------------------
402
403
    /**
404
     * Compact Exploded Words
405
     *
406
     * Callback function for xss_clean() to remove whitespace from
407
     * things like j a v a s c r i p t
408
     *
409
     * @param    type
410
     * @return    type
0 ignored issues
show
Bug introduced by
The type Distilleries\Security\Helpers\type was not found. Maybe you did not declare it correctly or list all dependencies?

The issue could also be caused by a filter entry in the build configuration. If the path has been excluded in your configuration, e.g. excluded_paths: ["lib/*"], you can move it to the dependency path list as follows:

filter:
    dependency_paths: ["lib/*"]

For further information see https://scrutinizer-ci.com/docs/tools/php/php-scrutinizer/#list-dependency-paths

Loading history...
411
     */
412 6
    protected function _compact_exploded_words($matches)
413
    {
414 6
        return preg_replace('/\s+/s', '', $matches[1]) . $matches[2];
0 ignored issues
show
Bug Best Practice introduced by
The expression return preg_replace('/\s...tches[1]) . $matches[2] returns the type string which is incompatible with the documented return type Distilleries\Security\Helpers\type.
Loading history...
415
    }
416
417
    // --------------------------------------------------------------------
418
419
    /**
420
     * Sanitize Naughty HTML
421
     *
422
     * Callback function for xss_clean() to remove naughty HTML elements
423
     *
424
     * @param    array
425
     * @return    string
426
     */
427 4
    protected function _sanitize_naughty_html($matches)
428
    {
429
        // encode opening brace
430 4
        $str = '&lt;' . $matches[1] . $matches[2] . $matches[3];
431
432
        // encode captured opening or closing brace to prevent recursive vectors
433 4
        $str .= str_replace(['>', '<'], ['&gt;', '&lt;'],
434 4
            $matches[4]);
435
436 4
        return $str;
437
    }
438
439
    // --------------------------------------------------------------------
440
441
    /**
442
     * JS Link Removal
443
     *
444
     * Callback function for xss_clean() to sanitize links
445
     * This limits the PCRE backtracks, making it more performance friendly
446
     * and prevents PREG_BACKTRACK_LIMIT_ERROR from being triggered in
447
     * PHP 5.2+ on link-heavy strings
448
     *
449
     * @param    array
450
     * @return    string
451
     */
452 2
    protected function _js_link_removal($match)
453
    {
454 2
        return str_replace(
455 2
            $match[1],
456 2
            preg_replace(
457 2
                '#href=.*?(alert\(|alert&\#40;|javascript\:|livescript\:|mocha\:|charset\=|window\.|document\.|\.cookie|<script|<xss|data\s*:)#si',
458 2
                '',
459 2
                $this->_filter_attributes(str_replace(['<', '>'], '', $match[1]))
460
            ),
461 2
            $match[0]
462
        );
463
    }
464
465
    // --------------------------------------------------------------------
466
467
    /**
468
     * JS Image Removal
469
     *
470
     * Callback function for xss_clean() to sanitize image tags
471
     * This limits the PCRE backtracks, making it more performance friendly
472
     * and prevents PREG_BACKTRACK_LIMIT_ERROR from being triggered in
473
     * PHP 5.2+ on image tag heavy strings
474
     *
475
     * @param    array
476
     * @return    string
477
     */
478 2
    protected function _js_img_removal($match)
479
    {
480 2
        return str_replace(
481 2
            $match[1],
482 2
            preg_replace(
483 2
                '#src=.*?(alert\(|alert&\#40;|javascript\:|livescript\:|mocha\:|charset\=|window\.|document\.|\.cookie|<script|<xss|base64\s*,)#si',
484 2
                '',
485 2
                $this->_filter_attributes(str_replace(['<', '>'], '', $match[1]))
486
            ),
487 2
            $match[0]
488
        );
489
    }
490
491
    // --------------------------------------------------------------------
492
493
    /**
494
     * Attribute Conversion
495
     *
496
     * Used as a callback for XSS Clean
497
     *
498
     * @param    array
499
     * @return    string
500
     */
501 4
    protected function _convert_attribute($match)
502
    {
503 4
        return str_replace(['>', '<', '\\'], ['&gt;', '&lt;', '\\\\'], $match[0]);
504
    }
505
506
    // --------------------------------------------------------------------
507
508
    /**
509
     * Filter Attributes
510
     *
511
     * Filters tag attributes for consistency and safety
512
     *
513
     * @param    string
514
     * @return    string
515
     */
516 2
    protected function _filter_attributes($str)
517
    {
518 2
        $out = '';
519
520 2
        if (preg_match_all('#\s*[a-z\-]+\s*=\s*(\042|\047)([^\\1]*?)\\1#is', $str, $matches)) {
521 2
            foreach ($matches[0] as $match) {
522 2
                $out .= preg_replace("#/\*.*?\*/#s", '', $match);
523
            }
524
        }
525
526 2
        return $out;
527
    }
528
529
    // --------------------------------------------------------------------
530
531
    /**
532
     * HTML Entity Decode Callback
533
     *
534
     * Used as a callback for XSS Clean
535
     *
536
     * @param    array
537
     * @return    string
538
     */
539 8
    protected function _decode_entity($match)
540
    {
541 8
        return $this->entity_decode($match[0]);
542
    }
543
544
    // --------------------------------------------------------------------
545
546
    /**
547
     * Validate URL entities
548
     *
549
     * Called by xss_clean()
550
     *
551
     * @param    string
552
     * @return    string
553
     */
554 12
    protected function _validate_entities($str)
555
    {
556
        /*
557
         * Protect GET variables in URLs
558
         */
559
560
        // 901119URL5918AMP18930PROTECT8198
561
562 12
        $str = preg_replace('|\&([a-z\_0-9\-]+)\=([a-z\_0-9\-]+)|i', $this->xss_hash() . "\\1=\\2", $str);
563
564
        /*
565
         * Validate standard character entities
566
         *
567
         * Add a semicolon if missing.  We do this to enable
568
         * the conversion of entities to ASCII later.
569
         *
570
         */
571 12
        $str = preg_replace('#(&\#?[0-9a-z]{2,})([\x00-\x20])*;?#i', "\\1;\\2", $str);
572
573
        /*
574
         * Validate UTF16 two byte encoding (x00)
575
         *
576
         * Just as above, adds a semicolon if missing.
577
         *
578
         */
579 12
        $str = preg_replace('#(&\#x?)([0-9A-F]+);?#i', "\\1\\2;", $str);
580
581
582 12
        return $str;
583
    }
584
585
    // ----------------------------------------------------------------------
586
587
    /**
588
     * Do Never Allowed
589
     *
590
     * A utility function for xss_clean()
591
     *
592
     * @param    string
593
     * @return    string
594
     */
595 12
    protected function _do_never_allowed($str)
596
    {
597 12
        $str = str_replace(array_keys($this->_never_allowed_str), $this->_never_allowed_str, $str);
598
599 12
        foreach ($this->_never_allowed_regex as $regex) {
600 12
            $str = preg_replace('#' . $regex . '#is', '[removed]', $str);
601
        }
602
603 12
        return $str;
604
    }
605
606
607 16
    protected function remove_invisible_characters($str, $url_encoded = true)
608
    {
609 16
        $non_displayables = [];
610
611
        // every control character except newline (dec 10)
612
        // carriage return (dec 13), and horizontal tab (dec 09)
613
614 16
        if ($url_encoded) {
615 12
            $non_displayables[] = '/%0[0-8bcef]/'; // url encoded 00-08, 11, 12, 14, 15
616 12
            $non_displayables[] = '/%1[0-9a-f]/'; // url encoded 16-31
617
        }
618
619 16
        $non_displayables[] = '/[\x00-\x08\x0B\x0C\x0E-\x1F\x7F]+/S'; // 00-08, 11, 12, 14-31, 127
620
621
        do {
622 16
            $str = preg_replace($non_displayables, '', $str, -1, $count);
623 16
        } while ($count);
624
625 16
        return $str;
626
    }
627
628
    /**
629
     * Random Hash for protecting URLs
630
     *
631
     * @return    string
632
     */
633 12
    public function xss_hash()
634
    {
635 12
        if ($this->_xss_hash == '') {
636 12
            mt_srand();
637 12
            $this->_xss_hash = md5(time() + mt_rand(0, 1999999999));
638
        }
639
640 12
        return $this->_xss_hash;
641
    }
642
643
644 2
    public static function escapeLike($str, $escape = '\'\'')
645
    {
646
647 2
        return str_replace(
648 2
            ['%', '_', '\'', '"', '<', '>', '(', ')', '{', ']', ':', '/', '\\'],
649 2
            ['\%', '\_', $escape, '\"', '\<', '\>', '\(', '\)', '\{', '\}', '\:', '\/', '\\\\'], $str);
650
    }
651
}