1 | <?php |
||
2 | |||
3 | /* |
||
4 | * @copyright 2014 Mautic Contributors. All rights reserved |
||
5 | * @author Mautic |
||
6 | * |
||
7 | * @link http://mautic.org |
||
8 | * |
||
9 | * @license GNU/GPLv3 http://www.gnu.org/licenses/gpl-3.0.html |
||
10 | */ |
||
11 | |||
12 | namespace Mautic\CoreBundle\Helper; |
||
13 | |||
14 | use Joomla\Filter\InputFilter; |
||
15 | |||
16 | class InputHelper |
||
17 | { |
||
18 | /** |
||
19 | * String filter. |
||
20 | * |
||
21 | * @var InputFilter |
||
22 | */ |
||
23 | private static $stringFilter; |
||
24 | |||
25 | /** |
||
26 | * HTML filter. |
||
27 | * |
||
28 | * @var InputFilter |
||
29 | */ |
||
30 | private static $htmlFilter; |
||
31 | |||
32 | /** |
||
33 | * @var InputFilter |
||
34 | */ |
||
35 | private static $strictHtmlFilter; |
||
36 | |||
37 | /** |
||
38 | * @param bool $html |
||
39 | * @param bool $strict |
||
40 | * |
||
41 | * @return InputFilter |
||
42 | */ |
||
43 | private static function getFilter($html = false, $strict = false) |
||
44 | { |
||
45 | if (empty(self::$htmlFilter)) { |
||
46 | // Most of Mautic's HTML uses include full HTML documents so use blacklist method |
||
47 | self::$htmlFilter = new InputFilter([], [], 1, 1); |
||
48 | self::$htmlFilter->tagBlacklist = [ |
||
49 | 'applet', |
||
50 | 'bgsound', |
||
51 | 'base', |
||
52 | 'basefont', |
||
53 | 'embed', |
||
54 | 'frame', |
||
55 | 'frameset', |
||
56 | 'ilayer', |
||
57 | 'layer', |
||
58 | 'object', |
||
59 | ]; |
||
60 | |||
61 | self::$htmlFilter->attrBlacklist = [ |
||
62 | 'codebase', |
||
63 | 'dynsrc', |
||
64 | 'lowsrc', |
||
65 | ]; |
||
66 | |||
67 | // Strict HTML - basic one liner formating really |
||
68 | self::$strictHtmlFilter = new InputFilter( |
||
69 | [ |
||
70 | 'b', |
||
71 | 'i', |
||
72 | 'u', |
||
73 | 'em', |
||
74 | 'strong', |
||
75 | 'a', |
||
76 | 'span', |
||
77 | ], [], 0, 1); |
||
78 | |||
79 | self::$strictHtmlFilter->attrBlacklist = [ |
||
80 | 'codebase', |
||
81 | 'dynsrc', |
||
82 | 'lowsrc', |
||
83 | ]; |
||
84 | |||
85 | // Standard behavior if HTML is not specifically used |
||
86 | self::$stringFilter = new InputFilter(); |
||
87 | } |
||
88 | |||
89 | switch (true) { |
||
90 | case $html: |
||
91 | return ($strict) ? self::$strictHtmlFilter : self::$htmlFilter; |
||
92 | default: |
||
93 | return self::$stringFilter; |
||
94 | } |
||
95 | } |
||
96 | |||
97 | /** |
||
98 | * Wrapper to InputHelper. |
||
99 | * |
||
100 | * @param $name |
||
101 | * @param $arguments |
||
102 | * |
||
103 | * @return mixed |
||
104 | */ |
||
105 | public static function __callStatic($name, $arguments) |
||
106 | { |
||
107 | return self::getFilter()->clean($arguments[0], $name); |
||
108 | } |
||
109 | |||
110 | /** |
||
111 | * Wrapper function to clean inputs. $mask can be an array of keys as the field names and values as the cleaning |
||
112 | * function to be used for the specific field. |
||
113 | * |
||
114 | * @param mixed $value |
||
115 | * @param mixed $mask |
||
116 | * @param bool $urldecode |
||
117 | * |
||
118 | * @return mixed |
||
119 | */ |
||
120 | public static function _($value, $mask = 'clean', $urldecode = false) |
||
121 | { |
||
122 | if (is_array($value)) { |
||
123 | foreach ($value as $k => &$v) { |
||
124 | $useMask = 'filter'; |
||
125 | if (is_array($mask)) { |
||
126 | if (array_key_exists($k, $mask)) { |
||
127 | if (is_array($mask[$k])) { |
||
128 | $useMask = $mask[$k]; |
||
129 | } elseif (method_exists('Mautic\CoreBundle\Helper\InputHelper', $mask[$k])) { |
||
130 | $useMask = $mask[$k]; |
||
131 | } |
||
132 | } elseif (is_array($v)) { |
||
133 | // Likely a collection so use the same mask |
||
134 | $useMask = $mask; |
||
135 | } |
||
136 | } elseif (method_exists('Mautic\CoreBundle\Helper\InputHelper', $mask)) { |
||
137 | $useMask = $mask; |
||
138 | } |
||
139 | |||
140 | if (is_array($v)) { |
||
141 | $v = self::_($v, $useMask, $urldecode); |
||
142 | } elseif ('filter' == $useMask) { |
||
143 | $v = self::getFilter()->clean($v, $useMask); |
||
144 | } else { |
||
145 | $v = self::$useMask($v, $urldecode); |
||
146 | } |
||
147 | } |
||
148 | |||
149 | return $value; |
||
150 | } elseif (is_string($mask) && method_exists('Mautic\CoreBundle\Helper\InputHelper', $mask)) { |
||
151 | return self::$mask($value, $urldecode); |
||
152 | } else { |
||
153 | return self::getFilter()->clean($value, $mask); |
||
154 | } |
||
155 | } |
||
156 | |||
157 | /** |
||
158 | * Cleans value by HTML-escaping '"<>& and characters with ASCII value less than 32. |
||
159 | * |
||
160 | * @param $value |
||
161 | * @param bool|false $urldecode |
||
162 | * |
||
163 | * @return mixed|string |
||
164 | */ |
||
165 | public static function clean($value, $urldecode = false) |
||
166 | { |
||
167 | if (is_array($value)) { |
||
168 | foreach ($value as &$v) { |
||
169 | $v = self::clean($v, $urldecode); |
||
170 | } |
||
171 | |||
172 | return $value; |
||
173 | } elseif ($urldecode) { |
||
174 | $value = urldecode($value); |
||
175 | } |
||
176 | |||
177 | return filter_var($value, FILTER_SANITIZE_SPECIAL_CHARS); |
||
178 | } |
||
179 | |||
180 | /** |
||
181 | * Strips tags. |
||
182 | * |
||
183 | * @param $value |
||
184 | * @param bool|false $urldecode |
||
185 | * |
||
186 | * @return mixed |
||
187 | */ |
||
188 | public static function string($value, $urldecode = false) |
||
189 | { |
||
190 | if ($urldecode) { |
||
191 | $value = urldecode($value); |
||
192 | } |
||
193 | |||
194 | return filter_var($value, FILTER_SANITIZE_STRING, FILTER_FLAG_NO_ENCODE_QUOTES); |
||
195 | } |
||
196 | |||
197 | /** |
||
198 | * Strips non-alphanumeric characters. |
||
199 | * |
||
200 | * @param $value |
||
201 | * @param bool|false $urldecode |
||
202 | * @param bool|false $convertSpacesTo |
||
203 | * @param array $allowedCharacters |
||
204 | * |
||
205 | * @return string |
||
206 | */ |
||
207 | public static function alphanum($value, $urldecode = false, $convertSpacesTo = false, $allowedCharacters = []) |
||
208 | { |
||
209 | if ($urldecode) { |
||
210 | $value = urldecode($value); |
||
211 | } |
||
212 | |||
213 | if ($convertSpacesTo) { |
||
214 | $value = str_replace(' ', $convertSpacesTo, $value); |
||
215 | $allowedCharacters[] = $convertSpacesTo; |
||
216 | } |
||
217 | |||
218 | $delimiter = '~'; |
||
219 | if (false && in_array($delimiter, $allowedCharacters)) { |
||
220 | $delimiter = '#'; |
||
221 | } |
||
222 | |||
223 | if (!empty($allowedCharacters)) { |
||
224 | $regex = $delimiter.'[^0-9a-z'.preg_quote(implode('', $allowedCharacters)).']+'.$delimiter.'i'; |
||
225 | } else { |
||
226 | $regex = $delimiter.'[^0-9a-z]+'.$delimiter.'i'; |
||
227 | } |
||
228 | |||
229 | return trim(preg_replace($regex, '', $value)); |
||
230 | } |
||
231 | |||
232 | /** |
||
233 | * Returns a satnitized string which can be used in a file system. |
||
234 | * Attaches the file extension if provided. |
||
235 | * |
||
236 | * @param string $value |
||
237 | * @param string $extension |
||
238 | * |
||
239 | * @return string |
||
240 | */ |
||
241 | public static function filename($value, $extension = null) |
||
242 | { |
||
243 | $value = str_replace(' ', '_', $value); |
||
244 | |||
245 | $sanitized = preg_replace("/[^a-z0-9\.\_-]/", '', strtolower($value)); |
||
246 | $sanitized = preg_replace("/^\.\./", '', strtolower($sanitized)); |
||
247 | |||
248 | if (null === $extension) { |
||
249 | return $sanitized; |
||
250 | } |
||
251 | |||
252 | return sprintf('%s.%s', $sanitized, $extension); |
||
253 | } |
||
254 | |||
255 | /** |
||
256 | * Returns raw value. |
||
257 | * |
||
258 | * @param $value |
||
259 | * @param bool|false $urldecode |
||
260 | * |
||
261 | * @return string |
||
262 | */ |
||
263 | public static function raw($value, $urldecode = false) |
||
264 | { |
||
265 | if ($urldecode) { |
||
266 | $value = urldecode($value); |
||
267 | } |
||
268 | |||
269 | return $value; |
||
270 | } |
||
271 | |||
272 | /** |
||
273 | * Removes all characters except those allowed in URLs. |
||
274 | * |
||
275 | * @param $value |
||
276 | * @param bool|false $urldecode |
||
277 | * @param null $allowedProtocols |
||
278 | * @param null $defaultProtocol |
||
279 | * @param array $removeQuery |
||
280 | * @param bool|false $ignoreFragment |
||
281 | * |
||
282 | * @return mixed|string |
||
283 | */ |
||
284 | public static function url($value, $urldecode = false, $allowedProtocols = null, $defaultProtocol = null, $removeQuery = [], $ignoreFragment = false) |
||
285 | { |
||
286 | if ($urldecode) { |
||
287 | $value = urldecode($value); |
||
288 | } |
||
289 | |||
290 | if (empty($allowedProtocols)) { |
||
291 | $allowedProtocols = ['https', 'http', 'ftp']; |
||
292 | } |
||
293 | if (empty($defaultProtocol)) { |
||
294 | $defaultProtocol = 'http'; |
||
295 | } |
||
296 | |||
297 | $value = filter_var($value, FILTER_SANITIZE_URL); |
||
298 | $parts = parse_url($value); |
||
299 | |||
300 | if ($parts && !empty($parts['path'])) { |
||
0 ignored issues
–
show
|
|||
301 | if (isset($parts['scheme'])) { |
||
302 | if (!in_array($parts['scheme'], $allowedProtocols)) { |
||
303 | $parts['scheme'] = $defaultProtocol; |
||
304 | } |
||
305 | } else { |
||
306 | $parts['scheme'] = $defaultProtocol; |
||
307 | } |
||
308 | |||
309 | if (!empty($removeQuery) && !empty($parts['query'])) { |
||
310 | parse_str($parts['query'], $query); |
||
311 | foreach ($removeQuery as $q) { |
||
312 | if (isset($query[$q])) { |
||
313 | unset($query[$q]); |
||
314 | } |
||
315 | } |
||
316 | $parts['query'] = http_build_query($query); |
||
317 | } |
||
318 | |||
319 | $value = |
||
320 | (!empty($parts['scheme']) ? $parts['scheme'].'://' : ''). |
||
321 | (!empty($parts['user']) ? $parts['user'].':' : ''). |
||
322 | (!empty($parts['pass']) ? $parts['pass'].'@' : ''). |
||
323 | (!empty($parts['host']) ? $parts['host'] : ''). |
||
324 | (!empty($parts['port']) ? ':'.$parts['port'] : ''). |
||
325 | (!empty($parts['path']) ? $parts['path'] : ''). |
||
326 | (!empty($parts['query']) ? '?'.$parts['query'] : ''). |
||
327 | (!$ignoreFragment && !empty($parts['fragment']) ? '#'.$parts['fragment'] : ''); |
||
328 | } else { |
||
329 | //must have a really bad URL since parse_url returned false so let's just clean it |
||
330 | $value = self::clean($value); |
||
331 | } |
||
332 | |||
333 | //since a URL allows <>, let's add a safety step to remove <script> tags |
||
334 | $value = preg_replace('#<script(.*?)>(.*?)</script>#is', '', $value); |
||
335 | |||
336 | return $value; |
||
337 | } |
||
338 | |||
339 | /** |
||
340 | * Removes all characters except those allowed in emails. |
||
341 | * |
||
342 | * @param $value |
||
343 | * @param bool|false $urldecode |
||
344 | * |
||
345 | * @return mixed |
||
346 | */ |
||
347 | public static function email($value, $urldecode = false) |
||
348 | { |
||
349 | if ($urldecode) { |
||
350 | $value = urldecode($value); |
||
351 | } |
||
352 | |||
353 | $value = substr($value, 0, 254); |
||
354 | $value = filter_var($value, FILTER_SANITIZE_EMAIL); |
||
355 | $value = str_replace('..', '.', $value); |
||
356 | |||
357 | return trim($value); |
||
358 | } |
||
359 | |||
360 | /** |
||
361 | * Returns a clean array. |
||
362 | * |
||
363 | * @param $value |
||
364 | * @param bool|false $urldecode |
||
365 | * |
||
366 | * @return array|mixed|string |
||
367 | */ |
||
368 | public static function cleanArray($value, $urldecode = false) |
||
369 | { |
||
370 | $value = self::clean($value, $urldecode); |
||
371 | |||
372 | // Return empty array for empty values |
||
373 | if (empty($value)) { |
||
374 | return []; |
||
375 | } |
||
376 | |||
377 | // Put a value into array if not an array |
||
378 | if (!is_array($value)) { |
||
379 | $value = [$value]; |
||
380 | } |
||
381 | |||
382 | return $value; |
||
383 | } |
||
384 | |||
385 | /** |
||
386 | * Returns clean HTML. |
||
387 | * |
||
388 | * @param $value |
||
389 | * |
||
390 | * @return mixed|string |
||
391 | */ |
||
392 | public static function html($value) |
||
393 | { |
||
394 | if (is_array($value)) { |
||
395 | foreach ($value as &$val) { |
||
396 | $val = self::html($val); |
||
397 | } |
||
398 | } else { |
||
399 | // Special handling for doctype |
||
400 | $doctypeFound = preg_match('/(<!DOCTYPE(.*?)>)/is', $value, $doctype); |
||
401 | // Special handling for CDATA tags |
||
402 | $value = str_replace(['<![CDATA[', ']]>'], ['<mcdata>', '</mcdata>'], $value, $cdataCount); |
||
403 | // Special handling for conditional blocks |
||
404 | preg_match_all("/<!--\[if(.*?)\]>(.*?)(?:\<\!\-\-)?<!\[endif\]-->/is", $value, $matches); |
||
405 | if (!empty($matches[0])) { |
||
406 | $from = []; |
||
407 | $to = []; |
||
408 | foreach ($matches[0] as $key=>$match) { |
||
409 | $from[] = $match; |
||
410 | $startTag = '<mcondition>'; |
||
411 | $endTag = '</mcondition>'; |
||
412 | if (false !== strpos($match, '<!--<![endif]-->')) { |
||
413 | $startTag = '<mconditionnonoutlook>'; |
||
414 | $endTag = '</mconditionnonoutlook>'; |
||
415 | } |
||
416 | $to[] = $startTag.'<mif>'.$matches[1][$key].'</mif>'.$matches[2][$key].$endTag; |
||
417 | } |
||
418 | $value = str_replace($from, $to, $value); |
||
419 | } |
||
420 | |||
421 | // Slecial handling for XML tags used in Outlook optimized emails <o:*/> and <w:/> |
||
422 | $value = preg_replace_callback( |
||
423 | "/<\/*[o|w|v]:[^>]*>/is", |
||
424 | function ($matches) { |
||
425 | return '<mencoded>'.htmlspecialchars($matches[0]).'</mencoded>'; |
||
426 | }, |
||
427 | $value, -1, $needsDecoding); |
||
428 | |||
429 | // Slecial handling for script tags |
||
430 | $value = preg_replace_callback( |
||
431 | "/<script>(.*?)<\/script>/is", |
||
432 | function ($matches) { |
||
433 | return '<mscript>'.base64_encode($matches[0]).'</mscript>'; |
||
434 | }, |
||
435 | $value, -1, $needsScriptDecoding); |
||
436 | |||
437 | // Special handling for HTML comments |
||
438 | $value = str_replace(['<!-->', '<!--', '-->'], ['<mcomment></mcomment>', '<mcomment>', '</mcomment>'], $value, $commentCount); |
||
439 | |||
440 | // detect if there is any unicode character in the passed string |
||
441 | $hasUnicode = strlen($value) != strlen(utf8_decode($value)); |
||
442 | |||
443 | // Encode the incoming value before cleaning, it convert unicode to encoded strings |
||
444 | $value = $hasUnicode ? rawurlencode($value) : $value; |
||
445 | |||
446 | $value = self::getFilter(true)->clean($value, 'html'); |
||
447 | |||
448 | // After cleaning encode the value |
||
449 | $value = $hasUnicode ? rawurldecode($value) : $value; |
||
450 | |||
451 | // Was a doctype found? |
||
452 | if ($doctypeFound) { |
||
453 | $value = "$doctype[0]$value"; |
||
454 | } |
||
455 | |||
456 | if ($cdataCount) { |
||
457 | $value = str_replace(['<mcdata>', '</mcdata>'], ['<![CDATA[', ']]>'], $value); |
||
458 | } |
||
459 | |||
460 | if (!empty($matches[0])) { |
||
461 | // Special handling for conditional blocks |
||
462 | $value = preg_replace("/<mconditionnonoutlook><mif>(.*?)<\/mif>(.*?)<\/mconditionnonoutlook>/is", '<!--[if$1]>$2<!--<![endif]-->', $value); |
||
463 | $value = preg_replace("/<mcondition><mif>(.*?)<\/mif>(.*?)<\/mcondition>/is", '<!--[if$1]>$2<![endif]-->', $value); |
||
464 | } |
||
465 | |||
466 | if ($commentCount) { |
||
467 | $value = str_replace(['<mcomment>', '</mcomment>'], ['<!--', '-->'], $value); |
||
468 | } |
||
469 | |||
470 | if ($needsDecoding) { |
||
471 | $value = preg_replace_callback( |
||
472 | "/<mencoded>(.*?)<\/mencoded>/is", |
||
473 | function ($matches) { |
||
474 | return htmlspecialchars_decode($matches[1]); |
||
475 | }, |
||
476 | $value); |
||
477 | } |
||
478 | |||
479 | if ($needsScriptDecoding) { |
||
480 | $value = preg_replace_callback( |
||
481 | "/<mscript>(.*?)<\/mscript>/is", |
||
482 | function ($matches) { |
||
483 | return base64_decode($matches[1]); |
||
484 | }, |
||
485 | $value); |
||
486 | } |
||
487 | } |
||
488 | |||
489 | return $value; |
||
490 | } |
||
491 | |||
492 | /** |
||
493 | * Allows tags 'b', 'i', 'u', 'em', 'strong', 'a', 'span'. |
||
494 | * |
||
495 | * @param $data |
||
496 | * |
||
497 | * @return mixed|string |
||
498 | */ |
||
499 | public static function strict_html($value) |
||
500 | { |
||
501 | if (is_array($value)) { |
||
502 | foreach ($value as &$val) { |
||
503 | $val = self::strict_html($val); |
||
504 | } |
||
505 | } |
||
506 | |||
507 | return self::getFilter(true, true)->clean($value, 'html'); |
||
508 | } |
||
509 | |||
510 | /** |
||
511 | * Converts UTF8 into Latin. |
||
512 | * |
||
513 | * @param $value |
||
514 | * |
||
515 | * @return mixed |
||
516 | */ |
||
517 | public static function transliterate($value) |
||
518 | { |
||
519 | $transId = 'Any-Latin; Latin-ASCII'; |
||
520 | if (function_exists('transliterator_transliterate') && $trans = \Transliterator::create($transId)) { |
||
521 | // Use intl by default |
||
522 | return $trans->transliterate($value); |
||
523 | } |
||
524 | |||
525 | return \URLify::transliterate((string) $value); |
||
526 | } |
||
527 | } |
||
528 |
This check marks implicit conversions of arrays to boolean values in a comparison. While in PHP an empty array is considered to be equal (but not identical) to false, this is not always apparent.
Consider making the comparison explicit by using
empty(..)
or! empty(...)
instead.