Security::xss_clean()   A
last analyzed

Complexity

Conditions 4
Paths 5

Size

Total Lines 132
Code Lines 36

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 26
CRAP Score 4.0008

Importance

Changes 5
Bugs 0 Features 0
Metric Value
cc 4
eloc 36
c 5
b 0
f 0
nc 5
nop 3
dl 0
loc 132
ccs 26
cts 27
cp 0.963
crap 4.0008
rs 9.344

How to fix   Long Method   

Long Method

Small methods make your code easier to understand, in particular if combined with a good name. Besides, if your method is small, finding a good name is usually much easier.

For example, if you find yourself adding comments to a method's body, this is usually a good sign to extract the commented part to a new method, and use the comment as a starting point when coming up with a good name for this new method.

Commonly applied refactorings include:

1
<?php
2
/**
3
 * Created by PhpStorm.
4
 * User: mfrancois
5
 * Date: 04/07/2016
6
 * Time: 11:32
7
 */
8
9
namespace Distilleries\Security\Helpers;
10
11
12
class Security
13
{
14
15
16
    /**
17
     * Random Hash for protecting URLs
18
     *
19
     * @var string
20
     * @access protected
21
     */
22
    protected $_xss_hash = '';
23
24
    /**
25
     * List of never allowed strings
26
     *
27
     * @var array
28
     * @access protected
29
     */
30
    protected $_never_allowed_str = [
31
        'document.cookie' => '[removed]',
32
        'document.write' => '[removed]',
33
        '.parentNode' => '[removed]',
34
        '.innerHTML' => '[removed]',
35
        'window.location' => '[removed]',
36
        '-moz-binding' => '[removed]',
37
        '<!--' => '&lt;!--',
38
        '-->' => '--&gt;',
39
        '<![CDATA[' => '&lt;![CDATA[',
40
        '<comment>' => '&lt;comment&gt;'
41
    ];
42
43
    /* never allowed, regex replacement */
44
    /**
45
     * List of never allowed regex replacement
46
     *
47
     * @var array
48
     * @access protected
49
     */
50
    protected $_never_allowed_regex = [
51
        'javascript\s*:',
52
        'expression\s*(\(|&\#40;)', // CSS and IE
53
        'vbscript\s*:', // IE, surprise!
54
        'Redirect\s+302',
55
        "([\"'])?data\s*:[^\\1]*?base64[^\\1]*?,[^\\1]*?\\1?"
56
    ];
57
58
59
    /**
60
     * XSS Clean
61
     *
62
     * Sanitizes data so that Cross Site Scripting Hacks can be
63
     * prevented.  This function does a fair amount of work but
64
     * it is extremely thorough, designed to prevent even the
65
     * most obscure XSS attempts.  Nothing is ever 100% foolproof,
66
     * of course, but I haven't been able to get anything passed
67
     * the filter.
68
     *
69
     * Note: This function should only be used to deal with data
70
     * upon submission.  It's not something that should
71
     * be used for general runtime processing.
72
     *
73
     * This function was based in part on some code and ideas I
74
     * got from Bitflux: http://channel.bitflux.ch/wiki/XSS_Prevention
75
     *
76
     * To help develop this script I used this great list of
77
     * vulnerabilities along with a few other hacks I've
78
     * harvested from examining vulnerabilities in other programs:
79
     * http://ha.ckers.org/xss.html
80
     *
81
     * @param    mixed    string or array or boolean
82
     * @return    string | array
83
     */
84 16
    public function xss_clean($str, $is_image = false, $evilAttribute = true)
85
    {
86
        /*
87
           * Is the string an array?
88
           *
89
           */
90 16
        if (is_array($str)) {
91 2
            return $this->xssCleanFromArray($str, $is_image, $evilAttribute);
92
        }
93
94
        //Remove Invisible Characters
95 16
        $str = $this->remove_invisible_characters($str);
96
97
        // Validate Entities in URLs
98 16
        $str = $this->_validate_entities($str);
99
100
        /**
101
         * URL Decode
102
         *
103
         * Just in case stuff like this is submitted:
104
         *
105
         * <a href="http://%77%77%77%2E%67%6F%6F%67%6C%65%2E%63%6F%6D">Google</a>
106
         *
107
         * Note: Use rawurldecode() so it does not remove plus signs
108
         *
109
         **/
110 16
        $str = rawurldecode($str);
111
112
        /**
113
         * Convert character entities to ASCII
114
         *
115
         * This permits our tests below to work reliably.
116
         * We only convert entities that are within tags since
117
         * these are the ones that will pose security problems.
118
         *
119
         **/
120
121 16
        $str = preg_replace_callback("/[a-z]+=([\'\"]).*?\\1/si", [$this, '_convert_attribute'], $str);
122 16
        $str = preg_replace_callback("/<\w+.*?(?=>|<|$)/si", [$this, '_decode_entity'], $str);
123
124
        /**
125
         * Remove Invisible Characters Again!
126
         **/
127 16
        $str = $this->remove_invisible_characters($str);
128
129
        /**
130
         * Convert all tabs to spaces
131
         *
132
         * This prevents strings like this: ja    vascript
133
         * NOTE: we deal with spaces between characters later.
134
         * NOTE: preg_replace was found to be amazingly slow here on
135
         * large blocks of data, so we use str_replace.
136
         **/
137 16
        if (strpos($str, "\t") !== false) {
138 2
            $str = str_replace("\t", ' ', $str);
139
        }
140
141
        // Remove Strings that are never allowed
142 16
        $str = $this->_do_never_allowed($str);
143
144
        /*
145
           * Makes PHP tags safe
146
           *
147
           * Note: XML tags are inadvertently replaced too:
148
           *
149
           * <?xml
150
           *
151
           * But it doesn't seem to pose a problem.
152
           */
153 16
        if ($is_image === true) {
154
            // Images have a tendency to have the PHP short opening and
155
            // closing tags every so often so we skip those and only
156
            // do the long opening tags.
157
            $str = preg_replace('/<\?(php)/i', "&lt;?\\1", $str);
158
        } else {
159 16
            $str = str_replace(['<?', '?'.'>'], ['&lt;?', '?&gt;'], $str);
160
        }
161
162 16
        $str = $this->compactedWords([
163 16
            'javascript',
164
            'expression',
165
            'vbscript',
166
            'script',
167
            'applet',
168
            'alert',
169
            'document',
170
            'write',
171
            'cookie',
172
            'window'
173 8
        ], $str);
174
175
176 16
        $str = $this->disallowedJavascriptInLinks($str);
177
178
        // Remove evil attributes such as style, onclick and xmlns
179 16
        $str = $this->_remove_evil_attributes($str, $is_image, $evilAttribute);
180
181
        /*
182
           * Sanitize naughty HTML elements
183
           *
184
           * If a tag containing any of the words in the list
185
           * below is found, the tag gets converted to entities.
186
           *
187
           * So this: <blink>
188
           * Becomes: &lt;blink&gt;
189
           */
190 16
        $naughty = 'alert|applet|audio|basefont|base|behavior|bgsound|blink|body|embed|expression|form|frameset|frame|head|html|ilayer|iframe|input|isindex|layer|link|meta|object|plaintext|style|script|textarea|title|video|xml|xss';
191 16
        $str = preg_replace_callback('#<(/*\s*)('.$naughty.')([^><]*)([><]*)#is', [$this, '_sanitize_naughty_html'],
192 8
            $str);
193
194
        /*
195
           * Sanitize naughty scripting elements
196
           *
197
           * Similar to above, only instead of looking for
198
           * tags it looks for PHP and JavaScript commands
199
           * that are disallowed.  Rather than removing the
200
           * code, it simply converts the parenthesis to entities
201
           * rendering the code un-executable.
202
           *
203
           * For example:	eval('some code')
204
           * Becomes:		eval&#40;'some code'&#41;
205
           */
206 16
        $str = preg_replace('#(alert|cmd|passthru|eval|exec|expression|system|fopen|fsockopen|file|file_get_contents|readfile|unlink)(\s*)\((.*?)\)#si',
207 16
            "\\1\\2&#40;\\3&#41;", $str);
208
209
210
        // Final clean up
211
        // This adds a bit of extra precaution in case
212
        // something got through the above filters
213 16
        $str = $this->_do_never_allowed($str);
214
215 16
        return $str;
216
    }
217
218
    /**
219
     * Compact any exploded words
220
     *
221
     * This corrects words like:  j a v a s c r i p t
222
     * These words are compacted back to their correct state.
223
     **/
224 16
    protected function compactedWords($words, $str)
225
    {
226 16
        foreach ($words as $word) {
227 16
            $temp = '';
228
229 16
            for ($i = 0, $wordlen = strlen($word); $i < $wordlen; $i++) {
230 16
                $temp .= substr($word, $i, 1)."\s*";
231
            }
232
233
            // We only want to do this when it is followed by a non-word character
234
            // That way valid stuff like "dealer to" does not become "dealerto"
235 16
            $str = preg_replace_callback('#('.substr($temp, 0, -3).')(\W)#is', [$this, '_compact_exploded_words'],
236 8
                $str);
237
        }
238
239 16
        return $str;
240
    }
241
242
    /**
243
     * @param $str
244
     * @param bool $is_image
245
     * @param bool $evilAttribute
246
     * @return array
247
     */
248 2
    protected function xssCleanFromArray($str, $is_image = false, $evilAttribute = true)
249
    {
250 2
        foreach ($str as $key => $value) {
251 2
            $str[$key] = $this->xss_clean($str[$key], $is_image, $evilAttribute);
252
        }
253
254 2
        return $str;
255
    }
256
257
    /*
258
     * Remove disallowed Javascript in links or img tags
259
     * We used to do some version comparisons and use of stripos for PHP5,
260
     * but it is dog slow compared to these simplified non-capturing
261
     * preg_match(), especially if the pattern exists in the string
262
     */
263 16
    protected function disallowedJavascriptInLinks($str)
264
    {
265
        do {
266 16
            $original = $str;
267
268 16
            if (preg_match("/<a/i", $str)) {
269 4
                $str = preg_replace_callback("#<a\s+([^>]*?)(>|$)#si", [$this, '_js_link_removal'], $str);
270
            }
271
272 16
            if (preg_match("/<img/i", $str)) {
273 2
                $str = preg_replace_callback("#<img\s+([^>]*?)(\s?/?>|$)#si", [$this, '_js_img_removal'], $str);
274
            }
275
276 16
            if (preg_match("/script/i", $str) OR preg_match("/xss/i", $str)) {
277 2
                $str = preg_replace("#<(/*)(script|xss)(.*?)\>#si", '[removed]', $str);
278
            }
279 16
        } while ($original != $str);
280
281 16
        unset($original);
282
283 16
        return $str;
284
    }
285
286
    // --------------------------------------------------------------------
287
288
    /*
289
      * Remove Evil HTML Attributes (like evenhandlers and style)
290
      *
291
      * It removes the evil attribute and either:
292
      * 	- Everything up until a space
293
      *		For example, everything between the pipes:
294
      *		<a |style=document.write('hello');alert('world');| class=link>
295
      * 	- Everything inside the quotes
296
      *		For example, everything between the pipes:
297
      *		<a |style="document.write('hello'); alert('world');"| class="link">
298
      *
299
      * @param string $str The string to check
300
      * @param boolean $is_image TRUE if this is an image
301
      * @return string The string with the evil attributes removed
302
      */
303 16
    protected function _remove_evil_attributes($str, $is_image, $evilAttribute)
304
    {
305
        // All javascript event handlers (e.g. onload, onclick, onmouseover), style, and xmlns
306 16
        if ($evilAttribute) {
307 16
            $evil_attributes = ['on\w*', 'style', 'xmlns'];
308
        } else {
309
            $evil_attributes = ['on\w*', 'xmlns'];
310
        }
311
312 16
        if ($is_image === true) {
313
            /*
314
                * Adobe Photoshop puts XML metadata into JFIF images,
315
                * including namespacing, so we have to allow this for images.
316
                */
317
            unset($evil_attributes[array_search('xmlns', $evil_attributes)]);
318
        }
319
320
        do {
321 16
            $str = preg_replace(
322 16
                "#<(/?[^><]+?)([^A-Za-z\-])(".implode('|',
323 16
                    $evil_attributes).")(\s*=\s*)([\"][^>]*?[\"]|[\'][^>]*?[\']|[^>]*?)([\s><])([><]*)#i",
324 16
                "<$1$6",
325 16
                $str, -1, $count
326
            );
327 16
        } while ($count);
328
329 16
        return $str;
330
    }
331
332
333
334
    // --------------------------------------------------------------------
335
336
    /**
337
     * HTML Entities Decode
338
     *
339
     * This function is a replacement for html_entity_decode()
340
     *
341
     * The reason we are not using html_entity_decode() by itself is because
342
     * while it is not technically correct to leave out the semicolon
343
     * at the end of an entity most browsers will still interpret the entity
344
     * correctly.  html_entity_decode() does not convert entities without
345
     * semicolons, so we are left with our own little solution here. Bummer.
346
     *
347
     * @param    string
348
     * @param    string
349
     * @return    string
350
     */
351 10
    public function entity_decode($str, $charset = 'UTF-8')
352
    {
353 10
        if (stristr($str, '&') === false) {
354 8
            return $str;
355
        }
356
357 2
        $str = html_entity_decode($str, ENT_COMPAT, $charset);
358 1
        $str = preg_replace_callback('~&#x(0*[0-9a-f]{2,5})~i', function($matches) {
359
            return chr(intval(hexdec($matches[1])));
360 2
        }, $str);
361 1
        return preg_replace_callback('~&#([0-9]{2,4})~', function($matches) {
362
            return chr($matches[1]);
363 2
        }, $str);
364
    }
365
366
    // --------------------------------------------------------------------
367
368
    /**
369
     * Filename Security
370
     *
371
     * @param    string
372
     * @param    bool
373
     * @return    string
374
     */
375 4
    public function sanitize_filename($str, $relative_path = false)
376
    {
377
        $bad = [
378 4
            "../",
379
            "<!--",
380
            "-->",
381
            "<",
382
            ">",
383
            "'",
384
            '"',
385
            '&',
386
            '$',
387
            '#',
388
            '{',
389
            '}',
390
            '[',
391
            ']',
392
            '=',
393
            ';',
394
            '?',
395
            "%20",
396
            "%22",
397
            "%3c", // <
398
            "%253c", // <
399
            "%3e", // >
400
            "%0e", // >
401
            "%28", // (
402
            "%29", // )
403
            "%2528", // (
404
            "%26", // &
405
            "%24", // $
406
            "%3f", // ?
407
            "%3b", // ;
408
            "%3d"        // =
409
        ];
410
411 4
        if (!$relative_path) {
412 2
            $bad[] = './';
413 2
            $bad[] = '/';
414
        }
415
416 4
        $str = $this->remove_invisible_characters($str, false);
417
418 4
        return stripslashes(str_replace($bad, '', $str));
419
    }
420
421
    // ----------------------------------------------------------------
422
423
    /**
424
     * Compact Exploded Words
425
     *
426
     * Callback function for xss_clean() to remove whitespace from
427
     * things like j a v a s c r i p t
428
     *
429
     * @param    array $matches
430
     * @return    string | string[]
431
     */
432 6
    protected function _compact_exploded_words($matches)
433
    {
434 6
        return preg_replace('/\s+/s', '', $matches[1]).$matches[2];
435
    }
436
437
    // --------------------------------------------------------------------
438
439
    /**
440
     * Sanitize Naughty HTML
441
     *
442
     * Callback function for xss_clean() to remove naughty HTML elements
443
     *
444
     * @param    array
445
     * @return    string
446
     */
447 4
    protected function _sanitize_naughty_html($matches)
448
    {
449
        // encode opening brace
450 4
        $str = '&lt;'.$matches[1].$matches[2].$matches[3];
451
452
        // encode captured opening or closing brace to prevent recursive vectors
453 4
        $str .= str_replace(['>', '<'], ['&gt;', '&lt;'],
454 4
            $matches[4]);
455
456 4
        return $str;
457
    }
458
459
    // --------------------------------------------------------------------
460
461
    /**
462
     * JS Link Removal
463
     *
464
     * Callback function for xss_clean() to sanitize links
465
     * This limits the PCRE backtracks, making it more performance friendly
466
     * and prevents PREG_BACKTRACK_LIMIT_ERROR from being triggered in
467
     * PHP 5.2+ on link-heavy strings
468
     *
469
     * @param    array
470
     * @return    string
471
     */
472 2
    protected function _js_link_removal($match)
473
    {
474 2
        return str_replace(
475 2
            $match[1],
476 2
            preg_replace(
477 2
                '#href=.*?(alert\(|alert&\#40;|javascript\:|livescript\:|mocha\:|charset\=|window\.|document\.|\.cookie|<script|<xss|data\s*:)#si',
478 2
                '',
479 2
                $this->_filter_attributes(str_replace(['<', '>'], '', $match[1]))
480
            ),
481 2
            $match[0]
482
        );
483
    }
484
485
    // --------------------------------------------------------------------
486
487
    /**
488
     * JS Image Removal
489
     *
490
     * Callback function for xss_clean() to sanitize image tags
491
     * This limits the PCRE backtracks, making it more performance friendly
492
     * and prevents PREG_BACKTRACK_LIMIT_ERROR from being triggered in
493
     * PHP 5.2+ on image tag heavy strings
494
     *
495
     * @param    array
496
     * @return    string
497
     */
498 2
    protected function _js_img_removal($match)
499
    {
500 2
        return str_replace(
501 2
            $match[1],
502 2
            preg_replace(
503 2
                '#src=.*?(alert\(|alert&\#40;|javascript\:|livescript\:|mocha\:|charset\=|window\.|document\.|\.cookie|<script|<xss|base64\s*,)#si',
504 2
                '',
505 2
                $this->_filter_attributes(str_replace(['<', '>'], '', $match[1]))
506
            ),
507 2
            $match[0]
508
        );
509
    }
510
511
    // --------------------------------------------------------------------
512
513
    /**
514
     * Attribute Conversion
515
     *
516
     * Used as a callback for XSS Clean
517
     *
518
     * @param    array
519
     * @return    string
520
     */
521 4
    protected function _convert_attribute($match)
522
    {
523 4
        return str_replace(['>', '<', '\\'], ['&gt;', '&lt;', '\\\\'], $match[0]);
524
    }
525
526
    // --------------------------------------------------------------------
527
528
    /**
529
     * Filter Attributes
530
     *
531
     * Filters tag attributes for consistency and safety
532
     *
533
     * @param    string
534
     * @return    string
535
     */
536 2
    protected function _filter_attributes($str)
537
    {
538 2
        $out = '';
539
540 2
        if (preg_match_all('#\s*[a-z\-]+\s*=\s*(\042|\047)([^\\1]*?)\\1#is', $str, $matches)) {
541 2
            foreach ($matches[0] as $match) {
542 2
                $out .= preg_replace("#/\*.*?\*/#s", '', $match);
543
            }
544
        }
545
546 2
        return $out;
547
    }
548
549
    // --------------------------------------------------------------------
550
551
    /**
552
     * HTML Entity Decode Callback
553
     *
554
     * Used as a callback for XSS Clean
555
     *
556
     * @param    array
557
     * @return    string
558
     */
559 8
    protected function _decode_entity($match)
560
    {
561 8
        return $this->entity_decode($match[0]);
562
    }
563
564
    // --------------------------------------------------------------------
565
566
    /**
567
     * Validate URL entities
568
     *
569
     * Called by xss_clean()
570
     *
571
     * @param    string
572
     * @return    string
573
     */
574 16
    protected function _validate_entities($str)
575
    {
576
        /*
577
         * Protect GET variables in URLs
578
         */
579
580
        // 901119URL5918AMP18930PROTECT8198
581
582 16
        $str = preg_replace('|\&([a-z\_0-9\-]+)\=([a-z\_0-9\-]+)|i', $this->xss_hash()."\\1=\\2", $str);
583
584
        /*
585
         * Validate standard character entities
586
         *
587
         * Add a semicolon if missing.  We do this to enable
588
         * the conversion of entities to ASCII later.
589
         *
590
         */
591 16
        $str = preg_replace('#(&\#?[0-9a-z]{2,})([\x00-\x20])*;?#i', "\\1;\\2", $str);
592
593
        /*
594
         * Validate UTF16 two byte encoding (x00)
595
         *
596
         * Just as above, adds a semicolon if missing.
597
         *
598
         */
599 16
        $str = preg_replace('#(&\#x?)([0-9A-F]+);?#i', "\\1\\2;", $str);
600
601
602 16
        return $str;
603
    }
604
605
    // ----------------------------------------------------------------------
606
607
    /**
608
     * Do Never Allowed
609
     *
610
     * A utility function for xss_clean()
611
     *
612
     * @param    string
613
     * @return    string
614
     */
615 16
    protected function _do_never_allowed($str)
616
    {
617 16
        $str = str_replace(array_keys($this->_never_allowed_str), $this->_never_allowed_str, $str);
618
619 16
        foreach ($this->_never_allowed_regex as $regex) {
620 16
            $str = preg_replace('#'.$regex.'#is', '[removed]', $str);
621
        }
622
623 16
        return $str;
624
    }
625
626
627 20
    protected function remove_invisible_characters($str, $url_encoded = true)
628
    {
629 20
        $non_displayables = [];
630
631
        // every control character except newline (dec 10)
632
        // carriage return (dec 13), and horizontal tab (dec 09)
633
634 20
        if ($url_encoded) {
635 16
            $non_displayables[] = '/%0[0-8bcef]/'; // url encoded 00-08, 11, 12, 14, 15
636 16
            $non_displayables[] = '/%1[0-9a-f]/'; // url encoded 16-31
637
        }
638
639 20
        $non_displayables[] = '/[\x00-\x08\x0B\x0C\x0E-\x1F\x7F]+/S'; // 00-08, 11, 12, 14-31, 127
640
641
        do {
642 20
            $str = preg_replace($non_displayables, '', $str, -1, $count);
643 20
        } while ($count);
644
645 20
        return $str;
646
    }
647
648
    /**
649
     * Random Hash for protecting URLs
650
     *
651
     * @return    string
652
     */
653 16
    public function xss_hash()
654
    {
655 16
        if ($this->_xss_hash == '') {
656 16
            mt_srand();
657 16
            $this->_xss_hash = md5(time() + mt_rand(0, 1999999999));
658
        }
659
660 16
        return $this->_xss_hash;
661
    }
662
663
664 2
    public static function escapeLike($str, $escape = '\'\'')
665
    {
666
667 2
        return str_replace(
668 2
            ['%', '_', '\'', '"', '<', '>', '(', ')', '{', ']', ':', '/', '\\'],
669 2
            ['\%', '\_', $escape, '\"', '\<', '\>', '\(', '\)', '\{', '\}', '\:', '\/', '\\\\'], $str);
670
    }
671
}