Total Complexity | 40 |
Total Lines | 299 |
Duplicated Lines | 0 % |
Changes | 1 | ||
Bugs | 0 | Features | 0 |
Complex classes like Filters often do a lot of different things. To break such a class down, we need to identify a cohesive component within that class. A common approach to find such a component is to look for fields/methods that share the same prefixes, or suffixes.
Once you have determined the fields that belong together, you can apply the Extract Class refactoring. If the component makes sense as a sub-class, Extract Subclass is also a candidate, and is often faster.
While breaking up the class, it is a good idea to analyze how other classes use Filters, and based on these observations, apply Extract Interface, too.
1 | <?php |
||
23 | abstract class Filters |
||
24 | { |
||
25 | /** |
||
26 | * Remove EMoji from email content |
||
27 | * |
||
28 | * @param string $text String to sanitize |
||
29 | * @param int $allowedemoji Mode to allow emoji |
||
30 | * @return string Sanitized string |
||
31 | */ |
||
32 | public static function removeEmoji($text, $allowedemoji = 1) |
||
33 | { |
||
34 | // $allowedemoji can be |
||
35 | // 0=no emoji, 1=exclude the main known emojis (default), 2=keep only the main known (not implemented), 3=accept all |
||
36 | // Note that to accept emoji in database, you must use utf8mb4, utf8mb3 is not enough. |
||
37 | |||
38 | if ($allowedemoji == 0) { |
||
39 | // For a large removal: |
||
40 | $text = preg_replace('/[\x{2600}-\x{FFFF}]/u', '', $text); |
||
41 | $text = preg_replace('/[\x{10000}-\x{10FFFF}]/u', '', $text); |
||
42 | } |
||
43 | |||
44 | // Delete emoji chars with a regex |
||
45 | // See https://www.unicode.org/emoji/charts/full-emoji-list.html |
||
46 | if ($allowedemoji == 1) { |
||
47 | $arrayofcommonemoji = static::getEmojis(); |
||
48 | |||
49 | foreach ($arrayofcommonemoji as $key => $valarray) { |
||
50 | $text = preg_replace('/[\x{' . $valarray[0] . '}-\x{' . $valarray[1] . '}]/u', '', $text); |
||
51 | } |
||
52 | } |
||
53 | |||
54 | if ($allowedemoji == 2) { |
||
55 | // TODO Not yet implemented |
||
56 | } |
||
57 | |||
58 | return $text; |
||
59 | } |
||
60 | |||
61 | /** |
||
62 | * Return array of Emojis for miscellaneous use. |
||
63 | * |
||
64 | * @return array<string,array<string>> Array of Emojis in hexadecimal |
||
65 | */ |
||
66 | private static function getEmojis() |
||
67 | { |
||
68 | $arrayofcommonemoji = array( |
||
69 | 'misc' => array('2600', '26FF'), // Miscellaneous Symbols |
||
70 | 'ding' => array('2700', '27BF'), // Dingbats |
||
71 | '????' => array('9989', '9989'), // Variation Selectors |
||
72 | 'vars' => array('FE00', 'FE0F'), // Variation Selectors |
||
73 | 'pict' => array('1F300', '1F5FF'), // Miscellaneous Symbols and Pictographs |
||
74 | 'emot' => array('1F600', '1F64F'), // Emoticons |
||
75 | 'tran' => array('1F680', '1F6FF'), // Transport and Map Symbols |
||
76 | 'flag' => array('1F1E0', '1F1FF'), // Flags (note: may be 1F1E6 instead of 1F1E0) |
||
77 | 'supp' => array('1F900', '1F9FF'), // Supplemental Symbols and Pictographs |
||
78 | ); |
||
79 | |||
80 | return $arrayofcommonemoji; |
||
81 | } |
||
82 | |||
83 | /** |
||
84 | * Return true if security check on parameters are OK, false otherwise. |
||
85 | * |
||
86 | * @param string|array<string,string> $var Variable name |
||
87 | * @param int<0,2> $type 1=GET, 0=POST, 2=PHP_SELF |
||
88 | * @param int<0,1> $stopcode 0=No stop code, 1=Stop code (default) if injection found |
||
89 | * @return boolean True if there is no injection. |
||
90 | */ |
||
91 | public static function analyseVarsForSqlAndScriptsInjection(&$var, $type, $stopcode = 1) |
||
148 | } |
||
149 | |||
150 | /** |
||
151 | * Security: WAF layer for SQL Injection and XSS Injection (scripts) protection (Filters on GET, POST, PHP_SELF). |
||
152 | * Warning: Such a protection can't be enough. It is not reliable as it will always be possible to bypass this. Good protection can |
||
153 | * only be guaranteed by escaping data during output. |
||
154 | * |
||
155 | * @param string $val Brute value found into $_GET, $_POST or PHP_SELF |
||
156 | * @param string $type 0=POST, 1=GET, 2=PHP_SELF, 3=GET without sql reserved keywords (the less tolerant test) |
||
157 | * @return int >0 if there is an injection, 0 if none |
||
158 | */ |
||
159 | public static function testSqlAndScriptInject($val, $type) |
||
160 | { |
||
161 | // Decode string first because a lot of things are obfuscated by encoding or multiple encoding. |
||
162 | // So <svg onload='console.log("123")' become <svg onload='console.log("123")' |
||
163 | // So ":'" become ":'" (due to ENT_HTML5) |
||
164 | // So "	
" become "" |
||
165 | // So "()" become "()" |
||
166 | |||
167 | // Loop to decode until no more things to decode. |
||
168 | //print "before decoding $val\n"; |
||
169 | do { |
||
170 | $oldval = $val; |
||
171 | $val = html_entity_decode($val, ENT_QUOTES | ENT_HTML5); // Decode ':', ''', '	', '&NewLine', ... |
||
172 | // Sometimes we have entities without the ; at end so html_entity_decode does not work but entities is still interpreted by browser. |
||
173 | $val = preg_replace_callback( |
||
174 | '/&#(x?[0-9][0-9a-f]+;?)/i', |
||
175 | /** |
||
176 | * @param string[] $m |
||
177 | * @return string |
||
178 | */ |
||
179 | static function ($m) { |
||
180 | // Decode 'n', ... |
||
181 | return Filters::realCharForNumericEntities($m); |
||
182 | }, |
||
183 | $val |
||
184 | ); |
||
185 | |||
186 | // We clean html comments because some hacks try to obfuscate evil strings by inserting HTML comments. Example: on<!-- -->error=alert(1) |
||
187 | $val = preg_replace('/<!--[^>]*-->/', '', $val); |
||
188 | $val = preg_replace('/[\r\n\t]/', '', $val); |
||
189 | } while ($oldval != $val); |
||
190 | //print "type = ".$type." after decoding: ".$val."\n"; |
||
191 | |||
192 | $inj = 0; |
||
193 | |||
194 | // We check string because some hacks try to obfuscate evil strings by inserting non printable chars. Example: 'java(ascci09)scr(ascii00)ipt' is processed like 'javascript' (whatever is place of evil ascii char) |
||
195 | // We should use dol_string_nounprintableascii but function is not yet loaded/available |
||
196 | // Example of valid UTF8 chars: |
||
197 | // utf8=utf8mb3: '\x09', '\x0A', '\x0D', '\x7E' |
||
198 | // utf8=utf8mb3: '\xE0\xA0\x80' |
||
199 | // utf8mb4: '\xF0\x9D\x84\x9E' (but this may be refused by the database insert if pagecode is utf8=utf8mb3) |
||
200 | $newval = preg_replace('/[\x00-\x08\x0B-\x0C\x0E-\x1F\x7F]/u', '', $val); // /u operator makes UTF8 valid characters being ignored so are not included into the replace |
||
201 | |||
202 | // Note that $newval may also be completely empty '' when non valid UTF8 are found. |
||
203 | if ($newval != $val) { |
||
204 | // If $val has changed after removing non valid UTF8 chars, it means we have an evil string. |
||
205 | $inj += 1; |
||
206 | } |
||
207 | //print 'inj='.$inj.'-type='.$type.'-val='.$val.'-newval='.$newval."\n"; |
||
208 | |||
209 | // For SQL Injection (only GET are used to scan for such injection strings) |
||
210 | if ($type == 1 || $type == 3) { |
||
211 | // Note the \s+ is replaced into \s* because some spaces may have been modified in previous loop |
||
212 | $inj += preg_match('/delete\s*from/i', $val); |
||
213 | $inj += preg_match('/create\s*table/i', $val); |
||
214 | $inj += preg_match('/insert\s*into/i', $val); |
||
215 | $inj += preg_match('/select\s*from/i', $val); |
||
216 | $inj += preg_match('/into\s*(outfile|dumpfile)/i', $val); |
||
217 | $inj += preg_match('/user\s*\(/i', $val); // avoid to use function user() or mysql_user() that return current database login |
||
218 | $inj += preg_match('/information_schema/i', $val); // avoid to use request that read information_schema database |
||
219 | $inj += preg_match('/<svg/i', $val); // <svg can be allowed in POST |
||
220 | $inj += preg_match('/update[^&=\w].*set.+=/i', $val); // the [^&=\w] test is to avoid error when request is like action=update&...set... or &updatemodule=...set... |
||
221 | $inj += preg_match('/union.+select/i', $val); |
||
222 | } |
||
223 | if ($type == 3) { |
||
224 | // Note the \s+ is replaced into \s* because some spaces may have been modified in previous loop |
||
225 | $inj += preg_match('/select|update|delete|truncate|replace|group\s*by|concat|count|from|union/i', $val); |
||
226 | } |
||
227 | if ($type != 2) { // Not common key strings, so we can check them both on GET and POST |
||
228 | $inj += preg_match('/updatexml\(/i', $val); |
||
229 | $inj += preg_match('/(\.\.%2f)+/i', $val); |
||
230 | $inj += preg_match('/\s@@/', $val); |
||
231 | } |
||
232 | // For XSS Injection done by closing textarea to execute content into a textarea field |
||
233 | $inj += preg_match('/<\/textarea/i', $val); |
||
234 | // For XSS Injection done by adding javascript with script |
||
235 | // This is all cases a browser consider text is javascript: |
||
236 | // When it found '<script', 'javascript:', '<style', 'onload\s=' on body tag, '="&' on a tag size with old browsers |
||
237 | // All examples on page: http://ha.ckers.org/xss.html#XSScalc |
||
238 | // More on https://www.owasp.org/index.php/XSS_Filter_Evasion_Cheat_Sheet |
||
239 | $inj += preg_match('/<audio/i', $val); |
||
240 | $inj += preg_match('/<embed/i', $val); |
||
241 | $inj += preg_match('/<iframe/i', $val); |
||
242 | $inj += preg_match('/<object/i', $val); |
||
243 | $inj += preg_match('/<script/i', $val); |
||
244 | $inj += preg_match('/Set\.constructor/i', $val); // ECMA script 6 |
||
245 | if (!defined('NOSTYLECHECK')) { |
||
246 | $inj += preg_match('/<style/i', $val); |
||
247 | } |
||
248 | $inj += preg_match('/base\s+href/si', $val); |
||
249 | $inj += preg_match('/=data:/si', $val); |
||
250 | // List of dom events is on https://www.w3schools.com/jsref/dom_obj_event.asp and https://developer.mozilla.org/en-US/docs/Web/Events |
||
251 | $inj += preg_match('/on(mouse|drag|key|load|touch|pointer|select|transition)[a-z]*\s*=/i', $val); // onmousexxx can be set on img or any html tag like <img title='...' onmouseover=alert(1)> |
||
252 | $inj += preg_match('/on(abort|after|animation|auxclick|before|blur|cancel|canplay|canplaythrough|change|click|close|contextmenu|cuechange|copy|cut)[a-z]*\s*=/i', $val); |
||
253 | $inj += preg_match('/on(dblclick|drop|durationchange|emptied|end|ended|error|focus|focusin|focusout|formdata|gotpointercapture|hashchange|input|invalid)[a-z]*\s*=/i', $val); |
||
254 | $inj += preg_match('/on(lostpointercapture|offline|online|pagehide|pageshow)[a-z]*\s*=/i', $val); |
||
255 | $inj += preg_match('/on(paste|pause|play|playing|progress|ratechange|reset|resize|scroll|search|seeked|seeking|show|stalled|start|submit|suspend)[a-z]*\s*=/i', $val); |
||
256 | $inj += preg_match('/on(timeupdate|toggle|unload|volumechange|waiting|wheel)[a-z]*\s*=/i', $val); |
||
257 | // More not into the previous list |
||
258 | |||
259 | $inj += preg_match('/on(repeat|begin|finish|beforeinput)[a-z]*\s*=/i', $val); |
||
260 | |||
261 | // We refuse html into html because some hacks try to obfuscate evil strings by inserting HTML into HTML. Example: <img on<a>error=alert(1) to bypass test on onerror |
||
262 | $tmpval = preg_replace('/<[^<]+>/', '', $val); |
||
263 | // List of dom events is on https://www.w3schools.com/jsref/dom_obj_event.asp and https://developer.mozilla.org/en-US/docs/Web/Events |
||
264 | $inj += preg_match('/on(mouse|drag|key|load|touch|pointer|select|transition)[a-z]*\s*=/i', $tmpval); // onmousexxx can be set on img or any html tag like <img title='...' onmouseover=alert(1)> |
||
265 | $inj += preg_match('/on(abort|after|animation|auxclick|before|blur|cancel|canplay|canplaythrough|change|click|close|contextmenu|cuechange|copy|cut)[a-z]*\s*=/i', $tmpval); |
||
266 | $inj += preg_match('/on(dblclick|drop|durationchange|emptied|end|ended|error|focus|focusin|focusout|formdata|gotpointercapture|hashchange|input|invalid)[a-z]*\s*=/i', $tmpval); |
||
267 | $inj += preg_match('/on(lostpointercapture|offline|online|pagehide|pageshow)[a-z]*\s*=/i', $tmpval); |
||
268 | $inj += preg_match('/on(paste|pause|play|playing|progress|ratechange|reset|resize|scroll|search|seeked|seeking|show|stalled|start|submit|suspend)[a-z]*\s*=/i', $tmpval); |
||
269 | $inj += preg_match('/on(timeupdate|toggle|unload|volumechange|waiting|wheel)[a-z]*\s*=/i', $tmpval); |
||
270 | // More not into the previous list |
||
271 | $inj += preg_match('/on(repeat|begin|finish|beforeinput)[a-z]*\s*=/i', $tmpval); |
||
272 | |||
273 | //$inj += preg_match('/on[A-Z][a-z]+\*=/', $val); // To lock event handlers onAbort(), ... |
||
274 | $inj += preg_match('/:|:|:/i', $val); // refused string ':' encoded (no reason to have it encoded) to lock 'javascript:...' |
||
275 | $inj += preg_match('/j\s*a\s*v\s*a\s*s\s*c\s*r\s*i\s*p\s*t\s*:/i', $val); |
||
276 | $inj += preg_match('/vbscript\s*:/i', $val); |
||
277 | // For XSS Injection done by adding javascript closing html tags like with onmousemove, etc... (closing a src or href tag with not cleaned param) |
||
278 | if ($type == 1 || $type == 3) { |
||
279 | $val = str_replace('enclosure="', 'enclosure=X', $val); // We accept enclosure=" for the export/import module |
||
280 | $inj += preg_match('/"/i', $val); // We refused " in GET parameters value. |
||
281 | } |
||
282 | if ($type == 2) { |
||
283 | $inj += preg_match('/[:;"\'<>\?\(\){}\$%]/', $val); // PHP_SELF is a file system (or url path without parameters). It can contains spaces. |
||
284 | } |
||
285 | |||
286 | return $inj; |
||
287 | } |
||
288 | |||
289 | /** |
||
290 | * Return the real char for a numeric entities. |
||
291 | * WARNING: This function is required by testSqlAndScriptInject() and the GETPOST 'restricthtml'. Regex calling must be similar. |
||
292 | * |
||
293 | * @param array<int,string> $matches Array with a decimal numeric entity into key 0, value without the &# into the key 1 |
||
294 | * @return string New value |
||
295 | */ |
||
296 | public static function realCharForNumericEntities($matches) |
||
322 | } |
||
323 | } |
||
324 |
In general, usage of exit should be done with care and only when running in a scripting context like a CLI script.