1
|
|
|
<?php |
2
|
|
|
|
3
|
|
|
/* Copyright (C) 2024 Rafael San José <[email protected]> |
4
|
|
|
* |
5
|
|
|
* This program is free software; you can redistribute it and/or modify |
6
|
|
|
* it under the terms of the GNU General Public License as published by |
7
|
|
|
* the Free Software Foundation; either version 3 of the License, or |
8
|
|
|
* (at your option) any later version. |
9
|
|
|
* |
10
|
|
|
* This program is distributed in the hope that it will be useful, |
11
|
|
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of |
12
|
|
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
13
|
|
|
* GNU General Public License for more details. |
14
|
|
|
* |
15
|
|
|
* You should have received a copy of the GNU General Public License |
16
|
|
|
* along with this program. If not, see <https://www.gnu.org/licenses/>. |
17
|
|
|
*/ |
18
|
|
|
|
19
|
|
|
namespace Dolibarr\Lib; |
20
|
|
|
|
21
|
|
|
use Exception; |
22
|
|
|
|
23
|
|
|
abstract class Filters |
24
|
|
|
{ |
25
|
|
|
/** |
26
|
|
|
* Remove EMoji from email content |
27
|
|
|
* |
28
|
|
|
* @param string $text String to sanitize |
29
|
|
|
* @param int $allowedemoji Mode to allow emoji |
30
|
|
|
* @return string Sanitized string |
31
|
|
|
*/ |
32
|
|
|
public static function removeEmoji($text, $allowedemoji = 1) |
33
|
|
|
{ |
34
|
|
|
// $allowedemoji can be |
35
|
|
|
// 0=no emoji, 1=exclude the main known emojis (default), 2=keep only the main known (not implemented), 3=accept all |
36
|
|
|
// Note that to accept emoji in database, you must use utf8mb4, utf8mb3 is not enough. |
37
|
|
|
|
38
|
|
|
if ($allowedemoji == 0) { |
39
|
|
|
// For a large removal: |
40
|
|
|
$text = preg_replace('/[\x{2600}-\x{FFFF}]/u', '', $text); |
41
|
|
|
$text = preg_replace('/[\x{10000}-\x{10FFFF}]/u', '', $text); |
42
|
|
|
} |
43
|
|
|
|
44
|
|
|
// Delete emoji chars with a regex |
45
|
|
|
// See https://www.unicode.org/emoji/charts/full-emoji-list.html |
46
|
|
|
if ($allowedemoji == 1) { |
47
|
|
|
$arrayofcommonemoji = static::getEmojis(); |
48
|
|
|
|
49
|
|
|
foreach ($arrayofcommonemoji as $key => $valarray) { |
50
|
|
|
$text = preg_replace('/[\x{' . $valarray[0] . '}-\x{' . $valarray[1] . '}]/u', '', $text); |
51
|
|
|
} |
52
|
|
|
} |
53
|
|
|
|
54
|
|
|
if ($allowedemoji == 2) { |
55
|
|
|
// TODO Not yet implemented |
56
|
|
|
} |
57
|
|
|
|
58
|
|
|
return $text; |
59
|
|
|
} |
60
|
|
|
|
61
|
|
|
/** |
62
|
|
|
* Return array of Emojis for miscellaneous use. |
63
|
|
|
* |
64
|
|
|
* @return array<string,array<string>> Array of Emojis in hexadecimal |
65
|
|
|
*/ |
66
|
|
|
private static function getEmojis() |
67
|
|
|
{ |
68
|
|
|
$arrayofcommonemoji = array( |
69
|
|
|
'misc' => array('2600', '26FF'), // Miscellaneous Symbols |
70
|
|
|
'ding' => array('2700', '27BF'), // Dingbats |
71
|
|
|
'????' => array('9989', '9989'), // Variation Selectors |
72
|
|
|
'vars' => array('FE00', 'FE0F'), // Variation Selectors |
73
|
|
|
'pict' => array('1F300', '1F5FF'), // Miscellaneous Symbols and Pictographs |
74
|
|
|
'emot' => array('1F600', '1F64F'), // Emoticons |
75
|
|
|
'tran' => array('1F680', '1F6FF'), // Transport and Map Symbols |
76
|
|
|
'flag' => array('1F1E0', '1F1FF'), // Flags (note: may be 1F1E6 instead of 1F1E0) |
77
|
|
|
'supp' => array('1F900', '1F9FF'), // Supplemental Symbols and Pictographs |
78
|
|
|
); |
79
|
|
|
|
80
|
|
|
return $arrayofcommonemoji; |
81
|
|
|
} |
82
|
|
|
|
83
|
|
|
/** |
84
|
|
|
* Return true if security check on parameters are OK, false otherwise. |
85
|
|
|
* |
86
|
|
|
* @param string|array<string,string> $var Variable name |
87
|
|
|
* @param int<0,2> $type 1=GET, 0=POST, 2=PHP_SELF |
88
|
|
|
* @param int<0,1> $stopcode 0=No stop code, 1=Stop code (default) if injection found |
89
|
|
|
* @return boolean True if there is no injection. |
90
|
|
|
*/ |
91
|
|
|
public static function analyseVarsForSqlAndScriptsInjection(&$var, $type, $stopcode = 1) |
92
|
|
|
{ |
93
|
|
|
if (!is_array($var)) { |
94
|
|
|
return (static::testSqlAndScriptInject($var, $type) <= 0); |
95
|
|
|
} |
96
|
|
|
|
97
|
|
|
foreach ($var as $key => $value) { // Warning, $key may also be used for attacks |
98
|
|
|
// Exclude check for some variable keys |
99
|
|
|
if ($type === 0 && defined('NOSCANPOSTFORINJECTION') && is_array(constant('NOSCANPOSTFORINJECTION')) && in_array($key, constant('NOSCANPOSTFORINJECTION'))) { |
100
|
|
|
continue; |
101
|
|
|
} |
102
|
|
|
|
103
|
|
|
if (static::analyseVarsForSqlAndScriptsInjection($key, $type, $stopcode) && static::analyseVarsForSqlAndScriptsInjection($value, $type, $stopcode)) { |
104
|
|
|
//$var[$key] = $value; // This is useless |
105
|
|
|
} else { |
106
|
|
|
http_response_code(403); |
107
|
|
|
|
108
|
|
|
// Get remote IP: PS: We do not use getRemoteIP(), function is not yet loaded and we need a value that can't be spoofed |
109
|
|
|
$ip = (empty($_SERVER['REMOTE_ADDR']) ? 'unknown' : $_SERVER['REMOTE_ADDR']); |
110
|
|
|
|
111
|
|
|
if ($stopcode) { |
112
|
|
|
$errormessage = 'Access refused to ' . htmlentities($ip, ENT_COMPAT, 'UTF-8') . ' by SQL or Script injection protection in main.inc.php:analyseVarsForSqlAndScriptsInjection type=' . htmlentities((string)$type, ENT_COMPAT, 'UTF-8'); |
113
|
|
|
//$errormessage .= ' paramkey='.htmlentities($key, ENT_COMPAT, 'UTF-8'); // Disabled to avoid text injection |
114
|
|
|
|
115
|
|
|
$errormessage2 = 'page=' . htmlentities((empty($_SERVER["REQUEST_URI"]) ? '' : $_SERVER["REQUEST_URI"]), ENT_COMPAT, 'UTF-8'); |
116
|
|
|
$errormessage2 .= ' paramtype=' . htmlentities((string)$type, ENT_COMPAT, 'UTF-8'); |
117
|
|
|
$errormessage2 .= ' paramkey=' . htmlentities($key, ENT_COMPAT, 'UTF-8'); |
118
|
|
|
$errormessage2 .= ' paramvalue=' . htmlentities($value, ENT_COMPAT, 'UTF-8'); |
119
|
|
|
|
120
|
|
|
print $errormessage; |
121
|
|
|
print "<br>\n"; |
122
|
|
|
print 'Try to go back, fix data of your form and resubmit it. You can contact also your technical support.'; |
123
|
|
|
|
124
|
|
|
print "\n" . '<!--' . "\n"; |
125
|
|
|
print $errormessage2; |
126
|
|
|
print "\n" . '-->'; |
127
|
|
|
|
128
|
|
|
// Add entry into the PHP server error log |
129
|
|
|
if (function_exists('error_log')) { |
130
|
|
|
error_log($errormessage . ' ' . substr($errormessage2, 2000)); |
131
|
|
|
} |
132
|
|
|
|
133
|
|
|
// Note: No addition into security audit table is done because we don't want to execute code in such a case. |
134
|
|
|
// Detection of too many such requests can be done with a fail2ban rule on 403 error code or into the PHP server error log. |
135
|
|
|
|
136
|
|
|
|
137
|
|
|
if (class_exists('PHPUnit\Framework\TestSuite')) { |
138
|
|
|
$message = $errormessage . ' ' . substr($errormessage2, 2000); |
139
|
|
|
throw new Exception("Security injection exception: $message"); |
140
|
|
|
} |
141
|
|
|
exit; |
|
|
|
|
142
|
|
|
} else { |
143
|
|
|
return false; |
144
|
|
|
} |
145
|
|
|
} |
146
|
|
|
} |
147
|
|
|
return true; |
148
|
|
|
} |
149
|
|
|
|
150
|
|
|
/** |
151
|
|
|
* Security: WAF layer for SQL Injection and XSS Injection (scripts) protection (Filters on GET, POST, PHP_SELF). |
152
|
|
|
* Warning: Such a protection can't be enough. It is not reliable as it will always be possible to bypass this. Good protection can |
153
|
|
|
* only be guaranteed by escaping data during output. |
154
|
|
|
* |
155
|
|
|
* @param string $val Brute value found into $_GET, $_POST or PHP_SELF |
156
|
|
|
* @param string $type 0=POST, 1=GET, 2=PHP_SELF, 3=GET without sql reserved keywords (the less tolerant test) |
157
|
|
|
* @return int >0 if there is an injection, 0 if none |
158
|
|
|
*/ |
159
|
|
|
public static function testSqlAndScriptInject($val, $type) |
160
|
|
|
{ |
161
|
|
|
// Decode string first because a lot of things are obfuscated by encoding or multiple encoding. |
162
|
|
|
// So <svg onload='console.log("123")' become <svg onload='console.log("123")' |
163
|
|
|
// So ":'" become ":'" (due to ENT_HTML5) |
164
|
|
|
// So "	
" become "" |
165
|
|
|
// So "()" become "()" |
166
|
|
|
|
167
|
|
|
// Loop to decode until no more things to decode. |
168
|
|
|
//print "before decoding $val\n"; |
169
|
|
|
do { |
170
|
|
|
$oldval = $val; |
171
|
|
|
$val = html_entity_decode($val, ENT_QUOTES | ENT_HTML5); // Decode ':', ''', '	', '&NewLine', ... |
172
|
|
|
// Sometimes we have entities without the ; at end so html_entity_decode does not work but entities is still interpreted by browser. |
173
|
|
|
$val = preg_replace_callback( |
174
|
|
|
'/&#(x?[0-9][0-9a-f]+;?)/i', |
175
|
|
|
/** |
176
|
|
|
* @param string[] $m |
177
|
|
|
* @return string |
178
|
|
|
*/ |
179
|
|
|
static function ($m) { |
180
|
|
|
// Decode 'n', ... |
181
|
|
|
return Filters::realCharForNumericEntities($m); |
182
|
|
|
}, |
183
|
|
|
$val |
184
|
|
|
); |
185
|
|
|
|
186
|
|
|
// We clean html comments because some hacks try to obfuscate evil strings by inserting HTML comments. Example: on<!-- -->error=alert(1) |
187
|
|
|
$val = preg_replace('/<!--[^>]*-->/', '', $val); |
188
|
|
|
$val = preg_replace('/[\r\n\t]/', '', $val); |
189
|
|
|
} while ($oldval != $val); |
190
|
|
|
//print "type = ".$type." after decoding: ".$val."\n"; |
191
|
|
|
|
192
|
|
|
$inj = 0; |
193
|
|
|
|
194
|
|
|
// We check string because some hacks try to obfuscate evil strings by inserting non printable chars. Example: 'java(ascci09)scr(ascii00)ipt' is processed like 'javascript' (whatever is place of evil ascii char) |
195
|
|
|
// We should use dol_string_nounprintableascii but function is not yet loaded/available |
196
|
|
|
// Example of valid UTF8 chars: |
197
|
|
|
// utf8=utf8mb3: '\x09', '\x0A', '\x0D', '\x7E' |
198
|
|
|
// utf8=utf8mb3: '\xE0\xA0\x80' |
199
|
|
|
// utf8mb4: '\xF0\x9D\x84\x9E' (but this may be refused by the database insert if pagecode is utf8=utf8mb3) |
200
|
|
|
$newval = preg_replace('/[\x00-\x08\x0B-\x0C\x0E-\x1F\x7F]/u', '', $val); // /u operator makes UTF8 valid characters being ignored so are not included into the replace |
201
|
|
|
|
202
|
|
|
// Note that $newval may also be completely empty '' when non valid UTF8 are found. |
203
|
|
|
if ($newval != $val) { |
204
|
|
|
// If $val has changed after removing non valid UTF8 chars, it means we have an evil string. |
205
|
|
|
$inj += 1; |
206
|
|
|
} |
207
|
|
|
//print 'inj='.$inj.'-type='.$type.'-val='.$val.'-newval='.$newval."\n"; |
208
|
|
|
|
209
|
|
|
// For SQL Injection (only GET are used to scan for such injection strings) |
210
|
|
|
if ($type == 1 || $type == 3) { |
211
|
|
|
// Note the \s+ is replaced into \s* because some spaces may have been modified in previous loop |
212
|
|
|
$inj += preg_match('/delete\s*from/i', $val); |
213
|
|
|
$inj += preg_match('/create\s*table/i', $val); |
214
|
|
|
$inj += preg_match('/insert\s*into/i', $val); |
215
|
|
|
$inj += preg_match('/select\s*from/i', $val); |
216
|
|
|
$inj += preg_match('/into\s*(outfile|dumpfile)/i', $val); |
217
|
|
|
$inj += preg_match('/user\s*\(/i', $val); // avoid to use function user() or mysql_user() that return current database login |
218
|
|
|
$inj += preg_match('/information_schema/i', $val); // avoid to use request that read information_schema database |
219
|
|
|
$inj += preg_match('/<svg/i', $val); // <svg can be allowed in POST |
220
|
|
|
$inj += preg_match('/update[^&=\w].*set.+=/i', $val); // the [^&=\w] test is to avoid error when request is like action=update&...set... or &updatemodule=...set... |
221
|
|
|
$inj += preg_match('/union.+select/i', $val); |
222
|
|
|
} |
223
|
|
|
if ($type == 3) { |
224
|
|
|
// Note the \s+ is replaced into \s* because some spaces may have been modified in previous loop |
225
|
|
|
$inj += preg_match('/select|update|delete|truncate|replace|group\s*by|concat|count|from|union/i', $val); |
226
|
|
|
} |
227
|
|
|
if ($type != 2) { // Not common key strings, so we can check them both on GET and POST |
228
|
|
|
$inj += preg_match('/updatexml\(/i', $val); |
229
|
|
|
$inj += preg_match('/(\.\.%2f)+/i', $val); |
230
|
|
|
$inj += preg_match('/\s@@/', $val); |
231
|
|
|
} |
232
|
|
|
// For XSS Injection done by closing textarea to execute content into a textarea field |
233
|
|
|
$inj += preg_match('/<\/textarea/i', $val); |
234
|
|
|
// For XSS Injection done by adding javascript with script |
235
|
|
|
// This is all cases a browser consider text is javascript: |
236
|
|
|
// When it found '<script', 'javascript:', '<style', 'onload\s=' on body tag, '="&' on a tag size with old browsers |
237
|
|
|
// All examples on page: http://ha.ckers.org/xss.html#XSScalc |
238
|
|
|
// More on https://www.owasp.org/index.php/XSS_Filter_Evasion_Cheat_Sheet |
239
|
|
|
$inj += preg_match('/<audio/i', $val); |
240
|
|
|
$inj += preg_match('/<embed/i', $val); |
241
|
|
|
$inj += preg_match('/<iframe/i', $val); |
242
|
|
|
$inj += preg_match('/<object/i', $val); |
243
|
|
|
$inj += preg_match('/<script/i', $val); |
244
|
|
|
$inj += preg_match('/Set\.constructor/i', $val); // ECMA script 6 |
245
|
|
|
if (!defined('NOSTYLECHECK')) { |
246
|
|
|
$inj += preg_match('/<style/i', $val); |
247
|
|
|
} |
248
|
|
|
$inj += preg_match('/base\s+href/si', $val); |
249
|
|
|
$inj += preg_match('/=data:/si', $val); |
250
|
|
|
// List of dom events is on https://www.w3schools.com/jsref/dom_obj_event.asp and https://developer.mozilla.org/en-US/docs/Web/Events |
251
|
|
|
$inj += preg_match('/on(mouse|drag|key|load|touch|pointer|select|transition)[a-z]*\s*=/i', $val); // onmousexxx can be set on img or any html tag like <img title='...' onmouseover=alert(1)> |
252
|
|
|
$inj += preg_match('/on(abort|after|animation|auxclick|before|blur|cancel|canplay|canplaythrough|change|click|close|contextmenu|cuechange|copy|cut)[a-z]*\s*=/i', $val); |
253
|
|
|
$inj += preg_match('/on(dblclick|drop|durationchange|emptied|end|ended|error|focus|focusin|focusout|formdata|gotpointercapture|hashchange|input|invalid)[a-z]*\s*=/i', $val); |
254
|
|
|
$inj += preg_match('/on(lostpointercapture|offline|online|pagehide|pageshow)[a-z]*\s*=/i', $val); |
255
|
|
|
$inj += preg_match('/on(paste|pause|play|playing|progress|ratechange|reset|resize|scroll|search|seeked|seeking|show|stalled|start|submit|suspend)[a-z]*\s*=/i', $val); |
256
|
|
|
$inj += preg_match('/on(timeupdate|toggle|unload|volumechange|waiting|wheel)[a-z]*\s*=/i', $val); |
257
|
|
|
// More not into the previous list |
258
|
|
|
|
259
|
|
|
$inj += preg_match('/on(repeat|begin|finish|beforeinput)[a-z]*\s*=/i', $val); |
260
|
|
|
|
261
|
|
|
// We refuse html into html because some hacks try to obfuscate evil strings by inserting HTML into HTML. Example: <img on<a>error=alert(1) to bypass test on onerror |
262
|
|
|
$tmpval = preg_replace('/<[^<]+>/', '', $val); |
263
|
|
|
// List of dom events is on https://www.w3schools.com/jsref/dom_obj_event.asp and https://developer.mozilla.org/en-US/docs/Web/Events |
264
|
|
|
$inj += preg_match('/on(mouse|drag|key|load|touch|pointer|select|transition)[a-z]*\s*=/i', $tmpval); // onmousexxx can be set on img or any html tag like <img title='...' onmouseover=alert(1)> |
265
|
|
|
$inj += preg_match('/on(abort|after|animation|auxclick|before|blur|cancel|canplay|canplaythrough|change|click|close|contextmenu|cuechange|copy|cut)[a-z]*\s*=/i', $tmpval); |
266
|
|
|
$inj += preg_match('/on(dblclick|drop|durationchange|emptied|end|ended|error|focus|focusin|focusout|formdata|gotpointercapture|hashchange|input|invalid)[a-z]*\s*=/i', $tmpval); |
267
|
|
|
$inj += preg_match('/on(lostpointercapture|offline|online|pagehide|pageshow)[a-z]*\s*=/i', $tmpval); |
268
|
|
|
$inj += preg_match('/on(paste|pause|play|playing|progress|ratechange|reset|resize|scroll|search|seeked|seeking|show|stalled|start|submit|suspend)[a-z]*\s*=/i', $tmpval); |
269
|
|
|
$inj += preg_match('/on(timeupdate|toggle|unload|volumechange|waiting|wheel)[a-z]*\s*=/i', $tmpval); |
270
|
|
|
// More not into the previous list |
271
|
|
|
$inj += preg_match('/on(repeat|begin|finish|beforeinput)[a-z]*\s*=/i', $tmpval); |
272
|
|
|
|
273
|
|
|
//$inj += preg_match('/on[A-Z][a-z]+\*=/', $val); // To lock event handlers onAbort(), ... |
274
|
|
|
$inj += preg_match('/:|:|:/i', $val); // refused string ':' encoded (no reason to have it encoded) to lock 'javascript:...' |
275
|
|
|
$inj += preg_match('/j\s*a\s*v\s*a\s*s\s*c\s*r\s*i\s*p\s*t\s*:/i', $val); |
276
|
|
|
$inj += preg_match('/vbscript\s*:/i', $val); |
277
|
|
|
// For XSS Injection done by adding javascript closing html tags like with onmousemove, etc... (closing a src or href tag with not cleaned param) |
278
|
|
|
if ($type == 1 || $type == 3) { |
279
|
|
|
$val = str_replace('enclosure="', 'enclosure=X', $val); // We accept enclosure=" for the export/import module |
280
|
|
|
$inj += preg_match('/"/i', $val); // We refused " in GET parameters value. |
281
|
|
|
} |
282
|
|
|
if ($type == 2) { |
283
|
|
|
$inj += preg_match('/[:;"\'<>\?\(\){}\$%]/', $val); // PHP_SELF is a file system (or url path without parameters). It can contains spaces. |
284
|
|
|
} |
285
|
|
|
|
286
|
|
|
return $inj; |
287
|
|
|
} |
288
|
|
|
|
289
|
|
|
/** |
290
|
|
|
* Return the real char for a numeric entities. |
291
|
|
|
* WARNING: This function is required by testSqlAndScriptInject() and the GETPOST 'restricthtml'. Regex calling must be similar. |
292
|
|
|
* |
293
|
|
|
* @param array<int,string> $matches Array with a decimal numeric entity into key 0, value without the &# into the key 1 |
294
|
|
|
* @return string New value |
295
|
|
|
*/ |
296
|
|
|
public static function realCharForNumericEntities($matches) |
297
|
|
|
{ |
298
|
|
|
$newstringnumentity = preg_replace('/;$/', '', $matches[1]); |
299
|
|
|
//print ' $newstringnumentity='.$newstringnumentity; |
300
|
|
|
|
301
|
|
|
if (preg_match('/^x/i', $newstringnumentity)) { // if numeric is hexadecimal |
302
|
|
|
$newstringnumentity = hexdec(preg_replace('/^x/i', '', $newstringnumentity)); |
303
|
|
|
} else { |
304
|
|
|
$newstringnumentity = (int)$newstringnumentity; |
305
|
|
|
} |
306
|
|
|
|
307
|
|
|
// The numeric values we don't want as entities because they encode ascii char, and why using html entities on ascii except for haking ? |
308
|
|
|
if (($newstringnumentity >= 65 && $newstringnumentity <= 90) || ($newstringnumentity >= 97 && $newstringnumentity <= 122)) { |
309
|
|
|
return chr((int)$newstringnumentity); |
310
|
|
|
} |
311
|
|
|
|
312
|
|
|
// The numeric values we want in UTF8 instead of entities because it is emoji |
313
|
|
|
$arrayofemojis = static::getEmojis(); |
314
|
|
|
foreach ($arrayofemojis as $valarray) { |
315
|
|
|
if ($newstringnumentity >= hexdec($valarray[0]) && $newstringnumentity <= hexdec($valarray[1])) { |
316
|
|
|
// This is a known emoji |
317
|
|
|
return html_entity_decode($matches[0], ENT_COMPAT | ENT_HTML5, 'UTF-8'); |
318
|
|
|
} |
319
|
|
|
} |
320
|
|
|
|
321
|
|
|
return '&#' . $matches[1]; // Value will be unchanged because regex was /&#( )/ |
322
|
|
|
} |
323
|
|
|
} |
324
|
|
|
|
In general, usage of exit should be done with care and only when running in a scripting context like a CLI script.