| Total Complexity | 40 |
| Total Lines | 299 |
| Duplicated Lines | 0 % |
| Changes | 1 | ||
| Bugs | 0 | Features | 0 |
Complex classes like Filters often do a lot of different things. To break such a class down, we need to identify a cohesive component within that class. A common approach to find such a component is to look for fields/methods that share the same prefixes, or suffixes.
Once you have determined the fields that belong together, you can apply the Extract Class refactoring. If the component makes sense as a sub-class, Extract Subclass is also a candidate, and is often faster.
While breaking up the class, it is a good idea to analyze how other classes use Filters, and based on these observations, apply Extract Interface, too.
| 1 | <?php |
||
| 23 | abstract class Filters |
||
| 24 | { |
||
| 25 | /** |
||
| 26 | * Remove EMoji from email content |
||
| 27 | * |
||
| 28 | * @param string $text String to sanitize |
||
| 29 | * @param int $allowedemoji Mode to allow emoji |
||
| 30 | * @return string Sanitized string |
||
| 31 | */ |
||
| 32 | public static function removeEmoji($text, $allowedemoji = 1) |
||
| 33 | { |
||
| 34 | // $allowedemoji can be |
||
| 35 | // 0=no emoji, 1=exclude the main known emojis (default), 2=keep only the main known (not implemented), 3=accept all |
||
| 36 | // Note that to accept emoji in database, you must use utf8mb4, utf8mb3 is not enough. |
||
| 37 | |||
| 38 | if ($allowedemoji == 0) { |
||
| 39 | // For a large removal: |
||
| 40 | $text = preg_replace('/[\x{2600}-\x{FFFF}]/u', '', $text); |
||
| 41 | $text = preg_replace('/[\x{10000}-\x{10FFFF}]/u', '', $text); |
||
| 42 | } |
||
| 43 | |||
| 44 | // Delete emoji chars with a regex |
||
| 45 | // See https://www.unicode.org/emoji/charts/full-emoji-list.html |
||
| 46 | if ($allowedemoji == 1) { |
||
| 47 | $arrayofcommonemoji = static::getEmojis(); |
||
| 48 | |||
| 49 | foreach ($arrayofcommonemoji as $key => $valarray) { |
||
| 50 | $text = preg_replace('/[\x{' . $valarray[0] . '}-\x{' . $valarray[1] . '}]/u', '', $text); |
||
| 51 | } |
||
| 52 | } |
||
| 53 | |||
| 54 | if ($allowedemoji == 2) { |
||
| 55 | // TODO Not yet implemented |
||
| 56 | } |
||
| 57 | |||
| 58 | return $text; |
||
| 59 | } |
||
| 60 | |||
| 61 | /** |
||
| 62 | * Return array of Emojis for miscellaneous use. |
||
| 63 | * |
||
| 64 | * @return array<string,array<string>> Array of Emojis in hexadecimal |
||
| 65 | */ |
||
| 66 | private static function getEmojis() |
||
| 67 | { |
||
| 68 | $arrayofcommonemoji = array( |
||
| 69 | 'misc' => array('2600', '26FF'), // Miscellaneous Symbols |
||
| 70 | 'ding' => array('2700', '27BF'), // Dingbats |
||
| 71 | '????' => array('9989', '9989'), // Variation Selectors |
||
| 72 | 'vars' => array('FE00', 'FE0F'), // Variation Selectors |
||
| 73 | 'pict' => array('1F300', '1F5FF'), // Miscellaneous Symbols and Pictographs |
||
| 74 | 'emot' => array('1F600', '1F64F'), // Emoticons |
||
| 75 | 'tran' => array('1F680', '1F6FF'), // Transport and Map Symbols |
||
| 76 | 'flag' => array('1F1E0', '1F1FF'), // Flags (note: may be 1F1E6 instead of 1F1E0) |
||
| 77 | 'supp' => array('1F900', '1F9FF'), // Supplemental Symbols and Pictographs |
||
| 78 | ); |
||
| 79 | |||
| 80 | return $arrayofcommonemoji; |
||
| 81 | } |
||
| 82 | |||
| 83 | /** |
||
| 84 | * Return true if security check on parameters are OK, false otherwise. |
||
| 85 | * |
||
| 86 | * @param string|array<string,string> $var Variable name |
||
| 87 | * @param int<0,2> $type 1=GET, 0=POST, 2=PHP_SELF |
||
| 88 | * @param int<0,1> $stopcode 0=No stop code, 1=Stop code (default) if injection found |
||
| 89 | * @return boolean True if there is no injection. |
||
| 90 | */ |
||
| 91 | public static function analyseVarsForSqlAndScriptsInjection(&$var, $type, $stopcode = 1) |
||
| 148 | } |
||
| 149 | |||
| 150 | /** |
||
| 151 | * Security: WAF layer for SQL Injection and XSS Injection (scripts) protection (Filters on GET, POST, PHP_SELF). |
||
| 152 | * Warning: Such a protection can't be enough. It is not reliable as it will always be possible to bypass this. Good protection can |
||
| 153 | * only be guaranteed by escaping data during output. |
||
| 154 | * |
||
| 155 | * @param string $val Brute value found into $_GET, $_POST or PHP_SELF |
||
| 156 | * @param string $type 0=POST, 1=GET, 2=PHP_SELF, 3=GET without sql reserved keywords (the less tolerant test) |
||
| 157 | * @return int >0 if there is an injection, 0 if none |
||
| 158 | */ |
||
| 159 | public static function testSqlAndScriptInject($val, $type) |
||
| 160 | { |
||
| 161 | // Decode string first because a lot of things are obfuscated by encoding or multiple encoding. |
||
| 162 | // So <svg onload='console.log("123")' become <svg onload='console.log("123")' |
||
| 163 | // So ":'" become ":'" (due to ENT_HTML5) |
||
| 164 | // So "	
" become "" |
||
| 165 | // So "()" become "()" |
||
| 166 | |||
| 167 | // Loop to decode until no more things to decode. |
||
| 168 | //print "before decoding $val\n"; |
||
| 169 | do { |
||
| 170 | $oldval = $val; |
||
| 171 | $val = html_entity_decode($val, ENT_QUOTES | ENT_HTML5); // Decode ':', ''', '	', '&NewLine', ... |
||
| 172 | // Sometimes we have entities without the ; at end so html_entity_decode does not work but entities is still interpreted by browser. |
||
| 173 | $val = preg_replace_callback( |
||
| 174 | '/&#(x?[0-9][0-9a-f]+;?)/i', |
||
| 175 | /** |
||
| 176 | * @param string[] $m |
||
| 177 | * @return string |
||
| 178 | */ |
||
| 179 | static function ($m) { |
||
| 180 | // Decode 'n', ... |
||
| 181 | return Filters::realCharForNumericEntities($m); |
||
| 182 | }, |
||
| 183 | $val |
||
| 184 | ); |
||
| 185 | |||
| 186 | // We clean html comments because some hacks try to obfuscate evil strings by inserting HTML comments. Example: on<!-- -->error=alert(1) |
||
| 187 | $val = preg_replace('/<!--[^>]*-->/', '', $val); |
||
| 188 | $val = preg_replace('/[\r\n\t]/', '', $val); |
||
| 189 | } while ($oldval != $val); |
||
| 190 | //print "type = ".$type." after decoding: ".$val."\n"; |
||
| 191 | |||
| 192 | $inj = 0; |
||
| 193 | |||
| 194 | // We check string because some hacks try to obfuscate evil strings by inserting non printable chars. Example: 'java(ascci09)scr(ascii00)ipt' is processed like 'javascript' (whatever is place of evil ascii char) |
||
| 195 | // We should use dol_string_nounprintableascii but function is not yet loaded/available |
||
| 196 | // Example of valid UTF8 chars: |
||
| 197 | // utf8=utf8mb3: '\x09', '\x0A', '\x0D', '\x7E' |
||
| 198 | // utf8=utf8mb3: '\xE0\xA0\x80' |
||
| 199 | // utf8mb4: '\xF0\x9D\x84\x9E' (but this may be refused by the database insert if pagecode is utf8=utf8mb3) |
||
| 200 | $newval = preg_replace('/[\x00-\x08\x0B-\x0C\x0E-\x1F\x7F]/u', '', $val); // /u operator makes UTF8 valid characters being ignored so are not included into the replace |
||
| 201 | |||
| 202 | // Note that $newval may also be completely empty '' when non valid UTF8 are found. |
||
| 203 | if ($newval != $val) { |
||
| 204 | // If $val has changed after removing non valid UTF8 chars, it means we have an evil string. |
||
| 205 | $inj += 1; |
||
| 206 | } |
||
| 207 | //print 'inj='.$inj.'-type='.$type.'-val='.$val.'-newval='.$newval."\n"; |
||
| 208 | |||
| 209 | // For SQL Injection (only GET are used to scan for such injection strings) |
||
| 210 | if ($type == 1 || $type == 3) { |
||
| 211 | // Note the \s+ is replaced into \s* because some spaces may have been modified in previous loop |
||
| 212 | $inj += preg_match('/delete\s*from/i', $val); |
||
| 213 | $inj += preg_match('/create\s*table/i', $val); |
||
| 214 | $inj += preg_match('/insert\s*into/i', $val); |
||
| 215 | $inj += preg_match('/select\s*from/i', $val); |
||
| 216 | $inj += preg_match('/into\s*(outfile|dumpfile)/i', $val); |
||
| 217 | $inj += preg_match('/user\s*\(/i', $val); // avoid to use function user() or mysql_user() that return current database login |
||
| 218 | $inj += preg_match('/information_schema/i', $val); // avoid to use request that read information_schema database |
||
| 219 | $inj += preg_match('/<svg/i', $val); // <svg can be allowed in POST |
||
| 220 | $inj += preg_match('/update[^&=\w].*set.+=/i', $val); // the [^&=\w] test is to avoid error when request is like action=update&...set... or &updatemodule=...set... |
||
| 221 | $inj += preg_match('/union.+select/i', $val); |
||
| 222 | } |
||
| 223 | if ($type == 3) { |
||
| 224 | // Note the \s+ is replaced into \s* because some spaces may have been modified in previous loop |
||
| 225 | $inj += preg_match('/select|update|delete|truncate|replace|group\s*by|concat|count|from|union/i', $val); |
||
| 226 | } |
||
| 227 | if ($type != 2) { // Not common key strings, so we can check them both on GET and POST |
||
| 228 | $inj += preg_match('/updatexml\(/i', $val); |
||
| 229 | $inj += preg_match('/(\.\.%2f)+/i', $val); |
||
| 230 | $inj += preg_match('/\s@@/', $val); |
||
| 231 | } |
||
| 232 | // For XSS Injection done by closing textarea to execute content into a textarea field |
||
| 233 | $inj += preg_match('/<\/textarea/i', $val); |
||
| 234 | // For XSS Injection done by adding javascript with script |
||
| 235 | // This is all cases a browser consider text is javascript: |
||
| 236 | // When it found '<script', 'javascript:', '<style', 'onload\s=' on body tag, '="&' on a tag size with old browsers |
||
| 237 | // All examples on page: http://ha.ckers.org/xss.html#XSScalc |
||
| 238 | // More on https://www.owasp.org/index.php/XSS_Filter_Evasion_Cheat_Sheet |
||
| 239 | $inj += preg_match('/<audio/i', $val); |
||
| 240 | $inj += preg_match('/<embed/i', $val); |
||
| 241 | $inj += preg_match('/<iframe/i', $val); |
||
| 242 | $inj += preg_match('/<object/i', $val); |
||
| 243 | $inj += preg_match('/<script/i', $val); |
||
| 244 | $inj += preg_match('/Set\.constructor/i', $val); // ECMA script 6 |
||
| 245 | if (!defined('NOSTYLECHECK')) { |
||
| 246 | $inj += preg_match('/<style/i', $val); |
||
| 247 | } |
||
| 248 | $inj += preg_match('/base\s+href/si', $val); |
||
| 249 | $inj += preg_match('/=data:/si', $val); |
||
| 250 | // List of dom events is on https://www.w3schools.com/jsref/dom_obj_event.asp and https://developer.mozilla.org/en-US/docs/Web/Events |
||
| 251 | $inj += preg_match('/on(mouse|drag|key|load|touch|pointer|select|transition)[a-z]*\s*=/i', $val); // onmousexxx can be set on img or any html tag like <img title='...' onmouseover=alert(1)> |
||
| 252 | $inj += preg_match('/on(abort|after|animation|auxclick|before|blur|cancel|canplay|canplaythrough|change|click|close|contextmenu|cuechange|copy|cut)[a-z]*\s*=/i', $val); |
||
| 253 | $inj += preg_match('/on(dblclick|drop|durationchange|emptied|end|ended|error|focus|focusin|focusout|formdata|gotpointercapture|hashchange|input|invalid)[a-z]*\s*=/i', $val); |
||
| 254 | $inj += preg_match('/on(lostpointercapture|offline|online|pagehide|pageshow)[a-z]*\s*=/i', $val); |
||
| 255 | $inj += preg_match('/on(paste|pause|play|playing|progress|ratechange|reset|resize|scroll|search|seeked|seeking|show|stalled|start|submit|suspend)[a-z]*\s*=/i', $val); |
||
| 256 | $inj += preg_match('/on(timeupdate|toggle|unload|volumechange|waiting|wheel)[a-z]*\s*=/i', $val); |
||
| 257 | // More not into the previous list |
||
| 258 | |||
| 259 | $inj += preg_match('/on(repeat|begin|finish|beforeinput)[a-z]*\s*=/i', $val); |
||
| 260 | |||
| 261 | // We refuse html into html because some hacks try to obfuscate evil strings by inserting HTML into HTML. Example: <img on<a>error=alert(1) to bypass test on onerror |
||
| 262 | $tmpval = preg_replace('/<[^<]+>/', '', $val); |
||
| 263 | // List of dom events is on https://www.w3schools.com/jsref/dom_obj_event.asp and https://developer.mozilla.org/en-US/docs/Web/Events |
||
| 264 | $inj += preg_match('/on(mouse|drag|key|load|touch|pointer|select|transition)[a-z]*\s*=/i', $tmpval); // onmousexxx can be set on img or any html tag like <img title='...' onmouseover=alert(1)> |
||
| 265 | $inj += preg_match('/on(abort|after|animation|auxclick|before|blur|cancel|canplay|canplaythrough|change|click|close|contextmenu|cuechange|copy|cut)[a-z]*\s*=/i', $tmpval); |
||
| 266 | $inj += preg_match('/on(dblclick|drop|durationchange|emptied|end|ended|error|focus|focusin|focusout|formdata|gotpointercapture|hashchange|input|invalid)[a-z]*\s*=/i', $tmpval); |
||
| 267 | $inj += preg_match('/on(lostpointercapture|offline|online|pagehide|pageshow)[a-z]*\s*=/i', $tmpval); |
||
| 268 | $inj += preg_match('/on(paste|pause|play|playing|progress|ratechange|reset|resize|scroll|search|seeked|seeking|show|stalled|start|submit|suspend)[a-z]*\s*=/i', $tmpval); |
||
| 269 | $inj += preg_match('/on(timeupdate|toggle|unload|volumechange|waiting|wheel)[a-z]*\s*=/i', $tmpval); |
||
| 270 | // More not into the previous list |
||
| 271 | $inj += preg_match('/on(repeat|begin|finish|beforeinput)[a-z]*\s*=/i', $tmpval); |
||
| 272 | |||
| 273 | //$inj += preg_match('/on[A-Z][a-z]+\*=/', $val); // To lock event handlers onAbort(), ... |
||
| 274 | $inj += preg_match('/:|:|:/i', $val); // refused string ':' encoded (no reason to have it encoded) to lock 'javascript:...' |
||
| 275 | $inj += preg_match('/j\s*a\s*v\s*a\s*s\s*c\s*r\s*i\s*p\s*t\s*:/i', $val); |
||
| 276 | $inj += preg_match('/vbscript\s*:/i', $val); |
||
| 277 | // For XSS Injection done by adding javascript closing html tags like with onmousemove, etc... (closing a src or href tag with not cleaned param) |
||
| 278 | if ($type == 1 || $type == 3) { |
||
| 279 | $val = str_replace('enclosure="', 'enclosure=X', $val); // We accept enclosure=" for the export/import module |
||
| 280 | $inj += preg_match('/"/i', $val); // We refused " in GET parameters value. |
||
| 281 | } |
||
| 282 | if ($type == 2) { |
||
| 283 | $inj += preg_match('/[:;"\'<>\?\(\){}\$%]/', $val); // PHP_SELF is a file system (or url path without parameters). It can contains spaces. |
||
| 284 | } |
||
| 285 | |||
| 286 | return $inj; |
||
| 287 | } |
||
| 288 | |||
| 289 | /** |
||
| 290 | * Return the real char for a numeric entities. |
||
| 291 | * WARNING: This function is required by testSqlAndScriptInject() and the GETPOST 'restricthtml'. Regex calling must be similar. |
||
| 292 | * |
||
| 293 | * @param array<int,string> $matches Array with a decimal numeric entity into key 0, value without the &# into the key 1 |
||
| 294 | * @return string New value |
||
| 295 | */ |
||
| 296 | public static function realCharForNumericEntities($matches) |
||
| 322 | } |
||
| 323 | } |
||
| 324 |
In general, usage of exit should be done with care and only when running in a scripting context like a CLI script.