FilterInput::remove()   A
last analyzed

Complexity

Conditions 2
Paths 2

Size

Total Lines 10
Code Lines 5

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
eloc 5
dl 0
loc 10
rs 10
c 0
b 0
f 0
cc 2
nc 2
nop 1
1
<?php
2
/*
3
 You may not change or alter any portion of this comment or credits
4
 of supporting developers from this source code or any supporting source code
5
 which is considered copyrighted (c) material of the original comment or credit authors.
6
7
 This program is distributed in the hope that it will be useful,
8
 but WITHOUT ANY WARRANTY; without even the implied warranty of
9
 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
10
 */
11
12
namespace Xmf;
13
14
/**
15
 * FilterInput is a class for filtering input from any data source
16
 *
17
 * Forked from the php input filter library by Daniel Morris
18
 *
19
 * Original Contributors: Gianpaolo Racca, Ghislain Picard,
20
 *                        Marco Wandschneider, Chris Tobin and Andrew Eddie.
21
 *
22
 * @category  Xmf\FilterInput
23
 * @package   Xmf
24
 * @author    Daniel Morris <[email protected]>
25
 * @author    Louis Landry <[email protected]>
26
 * @author    Grégory Mage (Aka Mage)
27
 * @author    trabis <[email protected]>
28
 * @author    Richard Griffith <[email protected]>
29
 * @copyright 2005 Daniel Morris
30
 * @copyright 2005 - 2013 Open Source Matters, Inc. All rights reserved.
31
 * @copyright 2011-2023 XOOPS Project (https://xoops.org)
32
 * @license   GNU GPL 2.0 or later (https://www.gnu.org/licenses/gpl-2.0.html)
33
 * @link      https://xoops.org
34
 */
35
class FilterInput
36
{
37
    protected $tagsArray;         // default is empty array
38
    protected $attrArray;         // default is empty array
39
40
    protected $tagsMethod;        // default is 0
41
    protected $attrMethod;        // default is 0
42
43
    protected $xssAuto;           // default is 1
44
    protected $tagBlacklist = array(
45
        'applet',
46
        'body',
47
        'bgsound',
48
        'base',
49
        'basefont',
50
        'embed',
51
        'frame',
52
        'frameset',
53
        'head',
54
        'html',
55
        'id',
56
        'iframe',
57
        'ilayer',
58
        'layer',
59
        'link',
60
        'meta',
61
        'name',
62
        'object',
63
        'script',
64
        'style',
65
        'title',
66
        'xml'
67
    );
68
    // also, it will strip ALL event handlers
69
    protected $attrBlacklist = array('action', 'background', 'codebase', 'dynsrc', 'lowsrc');
70
71
    /**
72
     * Constructor
73
     *
74
     * @param array $tagsArray  - list of user-defined tags
75
     * @param array $attrArray  - list of user-defined attributes
76
     * @param int   $tagsMethod - 0 = allow just user-defined, 1 = allow all but user-defined
77
     * @param int   $attrMethod - 0 = allow just user-defined, 1 = allow all but user-defined
78
     * @param int   $xssAuto    - 0 = only auto clean essentials, 1 = allow clean blacklisted tags/attr
79
     */
80
    protected function __construct(
81
        $tagsArray = array(),
82
        $attrArray = array(),
83
        $tagsMethod = 0,
84
        $attrMethod = 0,
85
        $xssAuto = 1
86
    ) {
87
        // make sure user defined arrays are in lowercase
88
        $tagsArrayCount = count($tagsArray);
89
        for ($i = 0; $i < $tagsArrayCount; ++$i) {
90
            $tagsArray[$i] = strtolower($tagsArray[$i]);
91
        }
92
        $attrArrayCount = count($attrArray);
93
        for ($i = 0; $i < $attrArrayCount; ++$i) {
94
            $attrArray[$i] = strtolower($attrArray[$i]);
95
        }
96
        // assign to member vars
97
        $this->tagsArray  = (array) $tagsArray;
98
        $this->attrArray  = (array) $attrArray;
99
        $this->tagsMethod = $tagsMethod;
100
        $this->attrMethod = $attrMethod;
101
        $this->xssAuto    = $xssAuto;
102
    }
103
104
    /**
105
     * Returns an input filter object, only creating it if it does not already exist.
106
     *
107
     * This method must be invoked as:
108
     *   $filter = FilterInput::getInstance();
109
     *
110
     * @param array $tagsArray  list of user-defined tags
111
     * @param array $attrArray  list of user-defined attributes
112
     * @param int   $tagsMethod WhiteList method = 0, BlackList method = 1
113
     * @param int   $attrMethod WhiteList method = 0, BlackList method = 1
114
     * @param int   $xssAuto    Only auto clean essentials = 0,
115
     *                          Allow clean blacklisted tags/attr = 1
116
     *
117
     * @return FilterInput object.
118
     */
119
    public static function getInstance(
120
        $tagsArray = array(),
121
        $attrArray = array(),
122
        $tagsMethod = 0,
123
        $attrMethod = 0,
124
        $xssAuto = 1
125
    ) {
126
        static $instances;
127
128
        $className = get_called_class(); // so an extender gets an instance of itself
129
130
        $sig = md5(serialize(array($className, $tagsArray, $attrArray, $tagsMethod, $attrMethod, $xssAuto)));
131
132
        if (!isset($instances)) {
133
            $instances = array();
134
        }
135
136
        if (empty($instances[$sig])) {
137
            $instances[$sig] = new static($tagsArray, $attrArray, $tagsMethod, $attrMethod, $xssAuto);
138
        }
139
140
        return $instances[$sig];
141
    }
142
143
    /**
144
     * Method to be called by another php script. Processes for XSS and
145
     * any specified bad code.
146
     *
147
     * @param mixed $source - input string/array-of-string to be 'cleaned'
148
     *
149
     * @return string|array $source - 'cleaned' version of input parameter
150
     */
151
    public function process($source)
152
    {
153
        if (is_array($source)) {
154
            // clean all elements in this array
155
            foreach ($source as $key => $value) {
156
                // filter element for XSS and other 'bad' code etc.
157
                if (is_string($value)) {
158
                    $source[$key] = $this->remove($this->decode($value));
159
                }
160
            }
161
            return $source;
162
        }
163
        if (is_string($source)) {
164
            // clean this string
165
            return $this->remove($this->decode($source));
166
        } else {
167
            // return parameter as given
168
            return $source;
169
        }
170
    }
171
172
    /**
173
     * Static method to be called by another php script.
174
     * Clean the supplied input using the default filter
175
     *
176
     * @param mixed  $source Input string/array-of-string to be 'cleaned'
177
     * @param string $type   Return/cleaning type for the variable, one of
178
     *                       (INTEGER, FLOAT, BOOLEAN, WORD, ALPHANUM, CMD, BASE64,
179
     *                        STRING, ARRAY, PATH, USERNAME, WEBURL, EMAIL, IP)
180
     *
181
     * @return mixed 'Cleaned' version of input parameter
182
     * @static
183
     */
184
    public static function clean($source, $type = 'string')
185
    {
186
        static $filter = null;
187
188
        // need an instance for methods, since this is supposed to be static
189
        // we must instantiate the class - this will take defaults
190
        if (!is_object($filter)) {
191
            $filter = static::getInstance();
192
        }
193
194
        return $filter->cleanVar($source, $type);
195
    }
196
197
    /**
198
     * Method to be called by another php script. Processes for XSS and
199
     * specified bad code according to rules supplied when this instance
200
     * was instantiated.
201
     *
202
     * @param mixed  $source Input string/array-of-string to be 'cleaned'
203
     * @param string $type   Return/cleaning type for the variable, one of
204
     *                       (INTEGER, FLOAT, BOOLEAN, WORD, ALPHANUM, CMD, BASE64,
205
     *                        STRING, ARRAY, PATH, USERNAME, WEBURL, EMAIL, IP)
206
     *
207
     * @return mixed 'Cleaned' version of input parameter
208
     * @static
209
     */
210
    public function cleanVar($source, $type = 'string')
211
    {
212
        // Handle the type constraint
213
        switch (strtoupper($type)) {
214
            case 'INT':
215
            case 'INTEGER':
216
                // Only use the first integer value
217
                preg_match('/-?\d+/', (string) $source, $matches);
218
                $result = isset($matches[0]) ? (int) $matches[0] : 0;
219
                break;
220
221
            case 'FLOAT':
222
            case 'DOUBLE':
223
                // Only use the first floating point value
224
                preg_match('/-?\d+(\.\d+)?/', (string) $source, $matches);
225
                $result = isset($matches[0]) ? (float) $matches[0] : 0;
226
                break;
227
228
            case 'BOOL':
229
            case 'BOOLEAN':
230
                $result = (bool) $source;
231
                break;
232
233
            case 'WORD':
234
                $result = (string) preg_replace('/[^A-Z_]/i', '', $source);
235
                break;
236
237
            case 'ALPHANUM':
238
            case 'ALNUM':
239
                $result = (string) preg_replace('/[^A-Z0-9]/i', '', $source);
240
                break;
241
242
            case 'CMD':
243
                $result = (string) preg_replace('/[^A-Z0-9_\.-]/i', '', $source);
244
                $result = strtolower($result);
245
                break;
246
247
            case 'BASE64':
248
                $result = (string) preg_replace('/[^A-Z0-9\/+=]/i', '', $source);
249
                break;
250
251
            case 'STRING':
252
                $result = (string) $this->process($source);
253
                break;
254
255
            case 'ARRAY':
256
                $result = (array) $this->process($source);
257
                break;
258
259
            case 'PATH':
260
                $source = trim((string) $source);
261
                $pattern = '/^([-_\.\/A-Z0-9=&%?~]+)(.*)$/i';
262
                preg_match($pattern, $source, $matches);
263
                $result = isset($matches[1]) ? (string) $matches[1] : '';
264
                break;
265
266
            case 'USERNAME':
267
                $result = (string) preg_replace('/[\x00-\x1F\x7F<>"\'%&]/', '', $source);
268
                break;
269
270
            case 'WEBURL':
271
                $result = (string) $this->process($source);
272
                // allow only relative, http or https
273
                $urlparts = parse_url($result);
274
                if (!empty($urlparts['scheme'])
275
                    && !($urlparts['scheme'] === 'http' || $urlparts['scheme'] === 'https')
276
                ) {
277
                    $result = '';
278
                }
279
                // do not allow quotes, tag brackets or controls
280
                if (!preg_match('#^[^"<>\x00-\x1F]+$#', $result)) {
281
                    $result = '';
282
                }
283
                break;
284
285
            case 'EMAIL':
286
                $result = (string) $source;
287
                if (!filter_var((string) $source, FILTER_VALIDATE_EMAIL)) {
288
                    $result = '';
289
                }
290
                break;
291
292
            case 'IP':
293
                $result = (string) $source;
294
                // this may be too restrictive.
295
                // Should the FILTER_FLAG_NO_PRIV_RANGE flag be excluded?
296
                if (!filter_var((string) $source, FILTER_VALIDATE_IP)) {
297
                    $result = '';
298
                }
299
                break;
300
301
            default:
302
                $result = $this->process($source);
303
                break;
304
        }
305
306
        return $result;
307
    }
308
309
    /**
310
     * Internal method to iteratively remove all unwanted tags and attributes
311
     *
312
     * @param String $source - input string to be 'cleaned'
313
     *
314
     * @return String $source - 'cleaned' version of input parameter
315
     */
316
    protected function remove($source)
317
    {
318
        $loopCounter = 0;
319
        // provides nested-tag protection
320
        while ($source != $this->filterTags($source)) {
321
            $source = $this->filterTags($source);
322
            ++$loopCounter;
323
        }
324
325
        return $source;
326
    }
327
328
    /**
329
     * Internal method to strip a string of certain tags
330
     *
331
     * @param String $source - input string to be 'cleaned'
332
     *
333
     * @return String $source - 'cleaned' version of input parameter
334
     */
335
    protected function filterTags($source)
336
    {
337
        // filter pass setup
338
        $preTag = null;
339
        $postTag = $source;
340
        // find initial tag's position
341
        $tagOpen_start = strpos($source, '<');
342
        // iterate through string until no tags left
343
        while ($tagOpen_start !== false) {
344
            // process tag iteratively
345
            $preTag .= substr($postTag, 0, $tagOpen_start);
346
            $postTag = substr($postTag, $tagOpen_start);
347
            $fromTagOpen = substr($postTag, 1);
348
            // end of tag
349
            $tagOpen_end = strpos($fromTagOpen, '>');
350
            if ($tagOpen_end === false) {
351
                break;
352
            }
353
            // next start of tag (for nested tag assessment)
354
            $tagOpen_nested = strpos($fromTagOpen, '<');
355
            if (($tagOpen_nested !== false) && ($tagOpen_nested < $tagOpen_end)) {
356
                $preTag .= substr($postTag, 0, ($tagOpen_nested + 1));
357
                $postTag = substr($postTag, ($tagOpen_nested + 1));
358
                $tagOpen_start = strpos($postTag, '<');
359
                continue;
360
            }
361
            $currentTag = substr($fromTagOpen, 0, $tagOpen_end);
362
            $tagLength = strlen($currentTag);
363
            if (!$tagOpen_end) {
364
                $preTag .= $postTag;
365
            }
366
            // iterate through tag finding attribute pairs - setup
367
            $tagLeft = $currentTag;
368
            $attrSet = array();
369
            $currentSpace = strpos($tagLeft, ' ');
370
            if (substr($currentTag, 0, 1) === "/") {
371
                // is end tag
372
                $isCloseTag = true;
373
                list($tagName) = explode(' ', $currentTag);
374
                $tagName = substr($tagName, 1);
375
            } else {
376
                // is start tag
377
                $isCloseTag = false;
378
                list($tagName) = explode(' ', $currentTag);
379
            }
380
            // excludes all "non-regular" tagnames OR no tagname OR remove if xssauto is on and tag is blacklisted
381
            if ((!preg_match("/^[a-z][a-z0-9]*$/i", $tagName))
382
                || (!$tagName)
383
                || ((in_array(strtolower($tagName), $this->tagBlacklist))
384
                    && ($this->xssAuto))
385
            ) {
386
                $postTag = substr($postTag, ($tagLength + 2));
387
                $tagOpen_start = strpos($postTag, '<');
388
                // don't append this tag
389
                continue;
390
            }
391
            // this while is needed to support attribute values with spaces in!
392
            while ($currentSpace !== false) {
393
                $fromSpace = substr($tagLeft, ($currentSpace + 1));
394
                $nextSpace = strpos($fromSpace, ' ');
395
                $openQuotes = strpos($fromSpace, '"');
396
                $closeQuotes = strpos(substr($fromSpace, ($openQuotes + 1)), '"') + $openQuotes + 1;
397
                // another equals exists
398
                if (strpos($fromSpace, '=') !== false) {
399
                    // opening and closing quotes exists
400
                    if (($openQuotes !== false)
401
                        && (strpos(substr($fromSpace, ($openQuotes + 1)), '"') !== false)
402
                    ) {
403
                        $attr = substr($fromSpace, 0, ($closeQuotes + 1));
404
                    } else {
405
                        $attr = substr($fromSpace, 0, $nextSpace);
406
                    }
407
                    // one or neither exist
408
                } else {
409
                    // no more equals exist
410
                    $attr = substr($fromSpace, 0, $nextSpace);
411
                }
412
                // last attr pair
413
                if (!$attr) {
414
                    $attr = $fromSpace;
415
                }
416
                // add to attribute pairs array
417
                $attrSet[] = $attr;
418
                // next inc
419
                $tagLeft = substr($fromSpace, strlen($attr));
420
                $currentSpace = strpos($tagLeft, ' ');
421
            }
422
            // appears in array specified by user
423
            $tagFound = in_array(strtolower($tagName), $this->tagsArray);
424
            // remove this tag on condition
425
            if ($tagFound !== (bool) $this->tagsMethod) {
426
                // reconstruct tag with allowed attributes
427
                if (!$isCloseTag) {
428
                    $attrSet = $this->filterAttr($attrSet);
429
                    $preTag .= '<' . $tagName;
430
                    $attrSetCount = count($attrSet);
431
                    for ($i = 0; $i < $attrSetCount; ++$i) {
432
                        $preTag .= ' ' . $attrSet[$i];
433
                    }
434
                    // reformat single tags to XHTML
435
                    if (strpos($fromTagOpen, "</" . $tagName)) {
436
                        $preTag .= '>';
437
                    } else {
438
                        $preTag .= ' />';
439
                    }
440
                } else {
441
                    // just the tagname
442
                    $preTag .= '</' . $tagName . '>';
443
                }
444
            }
445
            // find next tag's start
446
            $postTag = substr($postTag, ($tagLength + 2));
447
            $tagOpen_start = strpos($postTag, '<');
448
        }
449
        // append any code after end of tags
450
        $preTag .= $postTag;
451
452
        return $preTag;
453
    }
454
455
    /**
456
     * Internal method to strip a tag of certain attributes
457
     *
458
     * @param array $attrSet attributes
459
     *
460
     * @return array $newSet stripped attributes
461
     */
462
    protected function filterAttr($attrSet)
463
    {
464
        $newSet = array();
465
        // process attributes
466
        $attrSetCount = count($attrSet);
467
        for ($i = 0; $i < $attrSetCount; ++$i) {
468
            // skip blank spaces in tag
469
            if (!$attrSet[$i]) {
470
                continue;
471
            }
472
            // split into attr name and value
473
            $attrSubSet = explode('=', trim($attrSet[$i]));
474
            list($attrSubSet[0]) = explode(' ', $attrSubSet[0]);
475
            // removes all "non-regular" attr names AND also attr blacklisted
476
            if ((!preg_match('/[a-z]*$/i', $attrSubSet[0]))
477
                || (($this->xssAuto)
478
                    && ((in_array(strtolower($attrSubSet[0]), $this->attrBlacklist))
479
                        || (substr($attrSubSet[0], 0, 2) === 'on')))
480
            ) {
481
                continue;
482
            }
483
            // xss attr value filtering
484
            if ($attrSubSet[1]) {
485
                // strips unicode, hex, etc
486
                $attrSubSet[1] = str_replace('&#', '', $attrSubSet[1]);
487
                // strip normal newline within attr value
488
                $attrSubSet[1] = preg_replace('/\s+/', '', $attrSubSet[1]);
489
                // strip double quotes
490
                $attrSubSet[1] = str_replace('"', '', $attrSubSet[1]);
491
                // [requested feature] convert single quotes from either side to doubles
492
                // (Single quotes shouldn't be used to pad attr value)
493
                if ((substr($attrSubSet[1], 0, 1) === "'")
494
                    && (substr($attrSubSet[1], (strlen($attrSubSet[1]) - 1), 1) === "'")
495
                ) {
496
                    $attrSubSet[1] = substr($attrSubSet[1], 1, (strlen($attrSubSet[1]) - 2));
497
                }
498
                // strip slashes
499
                $attrSubSet[1] = stripslashes($attrSubSet[1]);
500
            }
501
            // auto strip attr's with "javascript:
502
            if (((strpos(strtolower($attrSubSet[1]), 'expression') !== false)
503
                    && (strtolower($attrSubSet[0]) === 'style')) ||
504
                (strpos(strtolower($attrSubSet[1]), 'javascript:') !== false) ||
505
                (strpos(strtolower($attrSubSet[1]), 'behaviour:') !== false) ||
506
                (strpos(strtolower($attrSubSet[1]), 'vbscript:') !== false) ||
507
                (strpos(strtolower($attrSubSet[1]), 'mocha:') !== false) ||
508
                (strpos(strtolower($attrSubSet[1]), 'livescript:') !== false)
509
            ) {
510
                continue;
511
            }
512
513
            // if matches user defined array
514
            $attrFound = in_array(strtolower($attrSubSet[0]), $this->attrArray);
515
            // keep this attr on condition
516
            if ($attrFound !== (bool) $this->attrMethod) {
517
                if ($attrSubSet[1]) {
518
                    // attr has value
519
                    $newSet[] = $attrSubSet[0] . '="' . $attrSubSet[1] . '"';
520
                } elseif ($attrSubSet[1] == "0") {
521
                    // attr has decimal zero as value
522
                    $newSet[] = $attrSubSet[0] . '="0"';
523
                } else {
524
                    // reformat single attributes to XHTML
525
                    $newSet[] = $attrSubSet[0] . '="' . $attrSubSet[0] . '"';
526
                }
527
            }
528
        }
529
530
        return $newSet;
531
    }
532
533
    /**
534
     * Try to convert to plaintext
535
     *
536
     * @param String $source string to decode
537
     *
538
     * @return String $source decoded
539
     */
540
    protected function decode($source)
541
    {
542
        // url decode
543
        $charset = defined('_CHARSET') ? constant('_CHARSET') : 'utf-8';
544
        $source = html_entity_decode($source, ENT_QUOTES, $charset);
545
        // convert decimal
546
        $source = preg_replace_callback(
547
            '/&#(\d+);/m',
548
            function ($matches) {
549
                return chr($matches[1]);
550
            },
551
            $source
552
        );
553
        // convert hex notation
554
        $source = preg_replace_callback(
555
            '/&#x([a-f0-9]+);/mi',
556
            function ($matches) {
557
                return chr('0x' . $matches[1]);
0 ignored issues
show
Bug introduced by
'0x' . $matches[1] of type string is incompatible with the type integer expected by parameter $codepoint of chr(). ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

557
                return chr(/** @scrutinizer ignore-type */ '0x' . $matches[1]);
Loading history...
558
            },
559
            $source
560
        );
561
562
        return $source;
563
    }
564
}
565