Test Setup Failed
Push — dev ( 608138...99eb65 )
by Rafael
61:41 queued 16s
created

Filters::testSqlAndScriptInject()   C

Complexity

Conditions 11
Paths 128

Size

Total Lines 128
Code Lines 67

Duplication

Lines 0
Ratio 0 %

Importance

Changes 1
Bugs 0 Features 0
Metric Value
cc 11
eloc 67
nc 128
nop 2
dl 0
loc 128
rs 6.3865
c 1
b 0
f 0

How to fix   Long Method    Complexity   

Long Method

Small methods make your code easier to understand, in particular if combined with a good name. Besides, if your method is small, finding a good name is usually much easier.

For example, if you find yourself adding comments to a method's body, this is usually a good sign to extract the commented part to a new method, and use the comment as a starting point when coming up with a good name for this new method.

Commonly applied refactorings include:

1
<?php
2
3
/* Copyright (C) 2024       Rafael San José             <[email protected]>
4
 *
5
 * This program is free software; you can redistribute it and/or modify
6
 * it under the terms of the GNU General Public License as published by
7
 * the Free Software Foundation; either version 3 of the License, or
8
 * (at your option) any later version.
9
 *
10
 * This program is distributed in the hope that it will be useful,
11
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13
 * GNU General Public License for more details.
14
 *
15
 * You should have received a copy of the GNU General Public License
16
 * along with this program. If not, see <https://www.gnu.org/licenses/>.
17
 */
18
19
namespace Dolibarr\Lib;
20
21
use Exception;
22
23
abstract class Filters
24
{
25
    /**
26
     * Remove EMoji from email content
27
     *
28
     * @param string $text String to sanitize
29
     * @param int $allowedemoji Mode to allow emoji
30
     * @return  string                  Sanitized string
31
     */
32
    public static function removeEmoji($text, $allowedemoji = 1)
33
    {
34
        // $allowedemoji can be
35
        // 0=no emoji, 1=exclude the main known emojis (default), 2=keep only the main known (not implemented), 3=accept all
36
        // Note that to accept emoji in database, you must use utf8mb4, utf8mb3 is not enough.
37
38
        if ($allowedemoji == 0) {
39
            // For a large removal:
40
            $text = preg_replace('/[\x{2600}-\x{FFFF}]/u', '', $text);
41
            $text = preg_replace('/[\x{10000}-\x{10FFFF}]/u', '', $text);
42
        }
43
44
        // Delete emoji chars with a regex
45
        // See https://www.unicode.org/emoji/charts/full-emoji-list.html
46
        if ($allowedemoji == 1) {
47
            $arrayofcommonemoji = static::getEmojis();
48
49
            foreach ($arrayofcommonemoji as $key => $valarray) {
50
                $text = preg_replace('/[\x{' . $valarray[0] . '}-\x{' . $valarray[1] . '}]/u', '', $text);
51
            }
52
        }
53
54
        if ($allowedemoji == 2) {
55
            // TODO Not yet implemented
56
        }
57
58
        return $text;
59
    }
60
61
    /**
62
     * Return array of Emojis for miscellaneous use.
63
     *
64
     * @return  array<string,array<string>>         Array of Emojis in hexadecimal
65
     */
66
    private static function getEmojis()
67
    {
68
        $arrayofcommonemoji = array(
69
            'misc' => array('2600', '26FF'),        // Miscellaneous Symbols
70
            'ding' => array('2700', '27BF'),        // Dingbats
71
            '????' => array('9989', '9989'),        // Variation Selectors
72
            'vars' => array('FE00', 'FE0F'),        // Variation Selectors
73
            'pict' => array('1F300', '1F5FF'),      // Miscellaneous Symbols and Pictographs
74
            'emot' => array('1F600', '1F64F'),      // Emoticons
75
            'tran' => array('1F680', '1F6FF'),      // Transport and Map Symbols
76
            'flag' => array('1F1E0', '1F1FF'),      // Flags (note: may be 1F1E6 instead of 1F1E0)
77
            'supp' => array('1F900', '1F9FF'),      // Supplemental Symbols and Pictographs
78
        );
79
80
        return $arrayofcommonemoji;
81
    }
82
83
    /**
84
     * Return true if security check on parameters are OK, false otherwise.
85
     *
86
     * @param string|array<string,string> $var Variable name
87
     * @param int<0,2> $type 1=GET, 0=POST, 2=PHP_SELF
88
     * @param int<0,1> $stopcode 0=No stop code, 1=Stop code (default) if injection found
89
     * @return      boolean                     True if there is no injection.
90
     */
91
    public static function analyseVarsForSqlAndScriptsInjection(&$var, $type, $stopcode = 1)
92
    {
93
        if (!is_array($var)) {
94
            return (static::testSqlAndScriptInject($var, $type) <= 0);
95
        }
96
97
        foreach ($var as $key => $value) {  // Warning, $key may also be used for attacks
98
            // Exclude check for some variable keys
99
            if ($type === 0 && defined('NOSCANPOSTFORINJECTION') && is_array(constant('NOSCANPOSTFORINJECTION')) && in_array($key, constant('NOSCANPOSTFORINJECTION'))) {
100
                continue;
101
            }
102
103
            if (static::analyseVarsForSqlAndScriptsInjection($key, $type, $stopcode) && static::analyseVarsForSqlAndScriptsInjection($value, $type, $stopcode)) {
104
                //$var[$key] = $value;  // This is useless
105
            } else {
106
                http_response_code(403);
107
108
                // Get remote IP: PS: We do not use getRemoteIP(), function is not yet loaded and we need a value that can't be spoofed
109
                $ip = (empty($_SERVER['REMOTE_ADDR']) ? 'unknown' : $_SERVER['REMOTE_ADDR']);
110
111
                if ($stopcode) {
112
                    $errormessage = 'Access refused to ' . htmlentities($ip, ENT_COMPAT, 'UTF-8') . ' by SQL or Script injection protection in main.inc.php:analyseVarsForSqlAndScriptsInjection type=' . htmlentities((string)$type, ENT_COMPAT, 'UTF-8');
113
                    //$errormessage .= ' paramkey='.htmlentities($key, ENT_COMPAT, 'UTF-8');    // Disabled to avoid text injection
114
115
                    $errormessage2 = 'page=' . htmlentities((empty($_SERVER["REQUEST_URI"]) ? '' : $_SERVER["REQUEST_URI"]), ENT_COMPAT, 'UTF-8');
116
                    $errormessage2 .= ' paramtype=' . htmlentities((string)$type, ENT_COMPAT, 'UTF-8');
117
                    $errormessage2 .= ' paramkey=' . htmlentities($key, ENT_COMPAT, 'UTF-8');
118
                    $errormessage2 .= ' paramvalue=' . htmlentities($value, ENT_COMPAT, 'UTF-8');
119
120
                    print $errormessage;
121
                    print "<br>\n";
122
                    print 'Try to go back, fix data of your form and resubmit it. You can contact also your technical support.';
123
124
                    print "\n" . '<!--' . "\n";
125
                    print $errormessage2;
126
                    print "\n" . '-->';
127
128
                    // Add entry into the PHP server error log
129
                    if (function_exists('error_log')) {
130
                        error_log($errormessage . ' ' . substr($errormessage2, 2000));
131
                    }
132
133
                    // Note: No addition into security audit table is done because we don't want to execute code in such a case.
134
                    // Detection of too many such requests can be done with a fail2ban rule on 403 error code or into the PHP server error log.
135
136
137
                    if (class_exists('PHPUnit\Framework\TestSuite')) {
138
                        $message = $errormessage . ' ' . substr($errormessage2, 2000);
139
                        throw new Exception("Security injection exception: $message");
140
                    }
141
                    exit;
0 ignored issues
show
Best Practice introduced by
Using exit here is not recommended.

In general, usage of exit should be done with care and only when running in a scripting context like a CLI script.

Loading history...
142
                } else {
143
                    return false;
144
                }
145
            }
146
        }
147
        return true;
148
    }
149
150
    /**
151
     * Security: WAF layer for SQL Injection and XSS Injection (scripts) protection (Filters on GET, POST, PHP_SELF).
152
     * Warning: Such a protection can't be enough. It is not reliable as it will always be possible to bypass this. Good protection can
153
     * only be guaranteed by escaping data during output.
154
     *
155
     * @param string $val Brute value found into $_GET, $_POST or PHP_SELF
156
     * @param string $type 0=POST, 1=GET, 2=PHP_SELF, 3=GET without sql reserved keywords (the less tolerant test)
157
     * @return      int                     >0 if there is an injection, 0 if none
158
     */
159
    public static function testSqlAndScriptInject($val, $type)
160
    {
161
        // Decode string first because a lot of things are obfuscated by encoding or multiple encoding.
162
        // So <svg o&#110;load='console.log(&quot;123&quot;)' become <svg onload='console.log(&quot;123&quot;)'
163
        // So "&colon;&apos;" become ":'" (due to ENT_HTML5)
164
        // So "&Tab;&NewLine;" become ""
165
        // So "&lpar;&rpar;" become "()"
166
167
        // Loop to decode until no more things to decode.
168
        //print "before decoding $val\n";
169
        do {
170
            $oldval = $val;
171
            $val = html_entity_decode($val, ENT_QUOTES | ENT_HTML5);    // Decode '&colon;', '&apos;', '&Tab;', '&NewLine', ...
172
            // Sometimes we have entities without the ; at end so html_entity_decode does not work but entities is still interpreted by browser.
173
            $val = preg_replace_callback(
174
                '/&#(x?[0-9][0-9a-f]+;?)/i',
175
                /**
176
                 * @param string[] $m
177
                 * @return string
178
                 */
179
                static function ($m) {
180
                    // Decode '&#110;', ...
181
                    return Filters::realCharForNumericEntities($m);
182
                },
183
                $val
184
            );
185
186
            // We clean html comments because some hacks try to obfuscate evil strings by inserting HTML comments. Example: on<!-- -->error=alert(1)
187
            $val = preg_replace('/<!--[^>]*-->/', '', $val);
188
            $val = preg_replace('/[\r\n\t]/', '', $val);
189
        } while ($oldval != $val);
190
        //print "type = ".$type." after decoding: ".$val."\n";
191
192
        $inj = 0;
193
194
        // We check string because some hacks try to obfuscate evil strings by inserting non printable chars. Example: 'java(ascci09)scr(ascii00)ipt' is processed like 'javascript' (whatever is place of evil ascii char)
195
        // We should use dol_string_nounprintableascii but function is not yet loaded/available
196
        // Example of valid UTF8 chars:
197
        // utf8=utf8mb3:    '\x09', '\x0A', '\x0D', '\x7E'
198
        // utf8=utf8mb3:    '\xE0\xA0\x80'
199
        // utf8mb4:         '\xF0\x9D\x84\x9E'   (but this may be refused by the database insert if pagecode is utf8=utf8mb3)
200
        $newval = preg_replace('/[\x00-\x08\x0B-\x0C\x0E-\x1F\x7F]/u', '', $val); // /u operator makes UTF8 valid characters being ignored so are not included into the replace
201
202
        // Note that $newval may also be completely empty '' when non valid UTF8 are found.
203
        if ($newval != $val) {
204
            // If $val has changed after removing non valid UTF8 chars, it means we have an evil string.
205
            $inj += 1;
206
        }
207
        //print 'inj='.$inj.'-type='.$type.'-val='.$val.'-newval='.$newval."\n";
208
209
        // For SQL Injection (only GET are used to scan for such injection strings)
210
        if ($type == 1 || $type == 3) {
211
            // Note the \s+ is replaced into \s* because some spaces may have been modified in previous loop
212
            $inj += preg_match('/delete\s*from/i', $val);
213
            $inj += preg_match('/create\s*table/i', $val);
214
            $inj += preg_match('/insert\s*into/i', $val);
215
            $inj += preg_match('/select\s*from/i', $val);
216
            $inj += preg_match('/into\s*(outfile|dumpfile)/i', $val);
217
            $inj += preg_match('/user\s*\(/i', $val); // avoid to use function user() or mysql_user() that return current database login
218
            $inj += preg_match('/information_schema/i', $val); // avoid to use request that read information_schema database
219
            $inj += preg_match('/<svg/i', $val); // <svg can be allowed in POST
220
            $inj += preg_match('/update[^&=\w].*set.+=/i', $val);   // the [^&=\w] test is to avoid error when request is like action=update&...set... or &updatemodule=...set...
221
            $inj += preg_match('/union.+select/i', $val);
222
        }
223
        if ($type == 3) {
224
            // Note the \s+ is replaced into \s* because some spaces may have been modified in previous loop
225
            $inj += preg_match('/select|update|delete|truncate|replace|group\s*by|concat|count|from|union/i', $val);
226
        }
227
        if ($type != 2) {   // Not common key strings, so we can check them both on GET and POST
228
            $inj += preg_match('/updatexml\(/i', $val);
229
            $inj += preg_match('/(\.\.%2f)+/i', $val);
230
            $inj += preg_match('/\s@@/', $val);
231
        }
232
        // For XSS Injection done by closing textarea to execute content into a textarea field
233
        $inj += preg_match('/<\/textarea/i', $val);
234
        // For XSS Injection done by adding javascript with script
235
        // This is all cases a browser consider text is javascript:
236
        // When it found '<script', 'javascript:', '<style', 'onload\s=' on body tag, '="&' on a tag size with old browsers
237
        // All examples on page: http://ha.ckers.org/xss.html#XSScalc
238
        // More on https://www.owasp.org/index.php/XSS_Filter_Evasion_Cheat_Sheet
239
        $inj += preg_match('/<audio/i', $val);
240
        $inj += preg_match('/<embed/i', $val);
241
        $inj += preg_match('/<iframe/i', $val);
242
        $inj += preg_match('/<object/i', $val);
243
        $inj += preg_match('/<script/i', $val);
244
        $inj += preg_match('/Set\.constructor/i', $val); // ECMA script 6
245
        if (!defined('NOSTYLECHECK')) {
246
            $inj += preg_match('/<style/i', $val);
247
        }
248
        $inj += preg_match('/base\s+href/si', $val);
249
        $inj += preg_match('/=data:/si', $val);
250
        // List of dom events is on https://www.w3schools.com/jsref/dom_obj_event.asp and https://developer.mozilla.org/en-US/docs/Web/Events
251
        $inj += preg_match('/on(mouse|drag|key|load|touch|pointer|select|transition)[a-z]*\s*=/i', $val); // onmousexxx can be set on img or any html tag like <img title='...' onmouseover=alert(1)>
252
        $inj += preg_match('/on(abort|after|animation|auxclick|before|blur|cancel|canplay|canplaythrough|change|click|close|contextmenu|cuechange|copy|cut)[a-z]*\s*=/i', $val);
253
        $inj += preg_match('/on(dblclick|drop|durationchange|emptied|end|ended|error|focus|focusin|focusout|formdata|gotpointercapture|hashchange|input|invalid)[a-z]*\s*=/i', $val);
254
        $inj += preg_match('/on(lostpointercapture|offline|online|pagehide|pageshow)[a-z]*\s*=/i', $val);
255
        $inj += preg_match('/on(paste|pause|play|playing|progress|ratechange|reset|resize|scroll|search|seeked|seeking|show|stalled|start|submit|suspend)[a-z]*\s*=/i', $val);
256
        $inj += preg_match('/on(timeupdate|toggle|unload|volumechange|waiting|wheel)[a-z]*\s*=/i', $val);
257
        // More not into the previous list
258
259
        $inj += preg_match('/on(repeat|begin|finish|beforeinput)[a-z]*\s*=/i', $val);
260
261
        // We refuse html into html because some hacks try to obfuscate evil strings by inserting HTML into HTML. Example: <img on<a>error=alert(1) to bypass test on onerror
262
        $tmpval = preg_replace('/<[^<]+>/', '', $val);
263
        // List of dom events is on https://www.w3schools.com/jsref/dom_obj_event.asp and https://developer.mozilla.org/en-US/docs/Web/Events
264
        $inj += preg_match('/on(mouse|drag|key|load|touch|pointer|select|transition)[a-z]*\s*=/i', $tmpval); // onmousexxx can be set on img or any html tag like <img title='...' onmouseover=alert(1)>
265
        $inj += preg_match('/on(abort|after|animation|auxclick|before|blur|cancel|canplay|canplaythrough|change|click|close|contextmenu|cuechange|copy|cut)[a-z]*\s*=/i', $tmpval);
266
        $inj += preg_match('/on(dblclick|drop|durationchange|emptied|end|ended|error|focus|focusin|focusout|formdata|gotpointercapture|hashchange|input|invalid)[a-z]*\s*=/i', $tmpval);
267
        $inj += preg_match('/on(lostpointercapture|offline|online|pagehide|pageshow)[a-z]*\s*=/i', $tmpval);
268
        $inj += preg_match('/on(paste|pause|play|playing|progress|ratechange|reset|resize|scroll|search|seeked|seeking|show|stalled|start|submit|suspend)[a-z]*\s*=/i', $tmpval);
269
        $inj += preg_match('/on(timeupdate|toggle|unload|volumechange|waiting|wheel)[a-z]*\s*=/i', $tmpval);
270
        // More not into the previous list
271
        $inj += preg_match('/on(repeat|begin|finish|beforeinput)[a-z]*\s*=/i', $tmpval);
272
273
        //$inj += preg_match('/on[A-Z][a-z]+\*=/', $val);   // To lock event handlers onAbort(), ...
274
        $inj += preg_match('/&#58;|&#0000058|&#x3A/i', $val); // refused string ':' encoded (no reason to have it encoded) to lock 'javascript:...'
275
        $inj += preg_match('/j\s*a\s*v\s*a\s*s\s*c\s*r\s*i\s*p\s*t\s*:/i', $val);
276
        $inj += preg_match('/vbscript\s*:/i', $val);
277
        // For XSS Injection done by adding javascript closing html tags like with onmousemove, etc... (closing a src or href tag with not cleaned param)
278
        if ($type == 1 || $type == 3) {
279
            $val = str_replace('enclosure="', 'enclosure=X', $val); // We accept enclosure=" for the export/import module
280
            $inj += preg_match('/"/i', $val); // We refused " in GET parameters value.
281
        }
282
        if ($type == 2) {
283
            $inj += preg_match('/[:;"\'<>\?\(\){}\$%]/', $val); // PHP_SELF is a file system (or url path without parameters). It can contains spaces.
284
        }
285
286
        return $inj;
287
    }
288
289
    /**
290
     * Return the real char for a numeric entities.
291
     * WARNING: This function is required by testSqlAndScriptInject() and the GETPOST 'restricthtml'. Regex calling must be similar.
292
     *
293
     * @param array<int,string> $matches Array with a decimal numeric entity into key 0, value without the &# into the key 1
294
     * @return  string                                  New value
295
     */
296
    public static function realCharForNumericEntities($matches)
297
    {
298
        $newstringnumentity = preg_replace('/;$/', '', $matches[1]);
299
        //print  ' $newstringnumentity='.$newstringnumentity;
300
301
        if (preg_match('/^x/i', $newstringnumentity)) {     // if numeric is hexadecimal
302
            $newstringnumentity = hexdec(preg_replace('/^x/i', '', $newstringnumentity));
303
        } else {
304
            $newstringnumentity = (int)$newstringnumentity;
305
        }
306
307
        // The numeric values we don't want as entities because they encode ascii char, and why using html entities on ascii except for haking ?
308
        if (($newstringnumentity >= 65 && $newstringnumentity <= 90) || ($newstringnumentity >= 97 && $newstringnumentity <= 122)) {
309
            return chr((int)$newstringnumentity);
310
        }
311
312
        // The numeric values we want in UTF8 instead of entities because it is emoji
313
        $arrayofemojis = static::getEmojis();
314
        foreach ($arrayofemojis as $valarray) {
315
            if ($newstringnumentity >= hexdec($valarray[0]) && $newstringnumentity <= hexdec($valarray[1])) {
316
                // This is a known emoji
317
                return html_entity_decode($matches[0], ENT_COMPAT | ENT_HTML5, 'UTF-8');
318
            }
319
        }
320
321
        return '&#' . $matches[1]; // Value will be unchanged because regex was /&#(  )/
322
    }
323
}
324