htmlfilter.php ➔ tln_fixstyle()   F
last analyzed

Complexity

Conditions 19
Paths 78

Size

Total Lines 126

Duplication

Lines 7
Ratio 5.56 %

Importance

Changes 0
Metric Value
cc 19
nc 78
nop 4
dl 7
loc 126
rs 3.6133
c 0
b 0
f 0

How to fix   Long Method    Complexity   

Long Method

Small methods make your code easier to understand, in particular if combined with a good name. Besides, if your method is small, finding a good name is usually much easier.

For example, if you find yourself adding comments to a method's body, this is usually a good sign to extract the commented part to a new method, and use the comment as a starting point when coming up with a good name for this new method.

Commonly applied refactorings include:

1
<?php
2
/**
3
 * htmlfilter.inc
4
 * ---------------
5
 * This set of functions allows you to filter html in order to remove
6
 * any malicious tags from it. Useful in cases when you need to filter
7
 * user input for any cross-site-scripting attempts.
8
 *
9
 * Copyright (C) 2002-2004 by Duke University
10
 *
11
 * This library is free software; you can redistribute it and/or
12
 * modify it under the terms of the GNU Lesser General Public
13
 * License as published by the Free Software Foundation; either
14
 * version 2.1 of the License, or (at your option) any later version.
15
 *
16
 * This library is distributed in the hope that it will be useful,
17
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
18
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
19
 * Lesser General Public License for more details.
20
 *
21
 * You should have received a copy of the GNU Lesser General Public
22
 * License along with this library; if not, write to the Free Software
23
 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
24
 * 02110-1301  USA
25
 *
26
 * @Author  Konstantin Riabitsev <[email protected]>
27
 * @Author  Jim Jagielski <[email protected] / [email protected]>
28
 * @Version 1.1 ($Date$)
29
 * @param mixed $tagname
30
 * @param mixed $attary
31
 * @param mixed $tagtype
32
 */
33
34
/**
35
 * This function returns the final tag out of the tag name, an array
36
 * of attributes, and the type of the tag. This function is called by
37
 * tln_sanitize internally.
38
 *
39
 * @param string $tagname the name of the tag.
40
 * @param array  $attary  the array of attributes and their values
41
 * @param int    $tagtype The type of the tag (see in comments).
42
 * @return string A string with the final tag representation.
43
 */
44
function tln_tagprint($tagname, $attary, $tagtype)
45
{
46
    if (2 == $tagtype) {
47
        $fulltag = '</' . $tagname . '>';
48
    } else {
49
        $fulltag = '<' . $tagname;
50
        if (is_array($attary) && count($attary)) {
51
            $atts = [];
52
            foreach ($attary as $attname => $attvalue) {
53
                array_push($atts, "$attname=$attvalue");
54
            }
55
            $fulltag .= ' ' . implode(' ', $atts);
56
        }
57
        if (3 == $tagtype) {
58
            $fulltag .= ' /';
59
        }
60
        $fulltag .= '>';
61
    }
62
63
    return $fulltag;
64
}
65
66
/**
67
 * A small helper function to use with array_walk. Modifies a by-ref
68
 * value and makes it lowercase.
69
 *
70
 * @param string $val a value passed by-ref.
71
 */
72
function tln_casenormalize(&$val)
73
{
74
    $val = mb_strtolower($val);
75
}
76
77
/**
78
 * This function skips any whitespace from the current position within
79
 * a string and to the next non-whitespace value.
80
 *
81
 * @param string $body   the string
82
 * @param int    $offset the offset within the string where we should start
83
 *                       looking for the next non-whitespace character.
84
 * @return int          the location within the $body where the next
85
 *                       non-whitespace char is located.
86
 */
87
function tln_skipspace($body, $offset)
88
{
89
    preg_match('/^(\s*)/s', mb_substr($body, $offset), $matches);
90
    if (count($matches[1])) {
91
        $count  = mb_strlen($matches[1]);
92
        $offset += $count;
93
    }
94
95
    return $offset;
96
}
97
98
/**
99
 * This function looks for the next character within a string.  It's
100
 * really just a glorified "strpos", except it catches the failures
101
 * nicely.
102
 *
103
 * @param string $body   The string to look for needle in.
104
 * @param int    $offset Start looking from this position.
105
 * @param string $needle The character/string to look for.
106
 * @return int           location of the next occurrence of the needle, or
107
 *                       strlen($body) if needle wasn't found.
108
 */
109
function tln_findnxstr($body, $offset, $needle)
110
{
111
    $pos = mb_strpos($body, $needle, $offset);
112
    if (false === $pos) {
113
        $pos = mb_strlen($body);
114
    }
115
116
    return $pos;
117
}
118
119
/**
120
 * This function takes a PCRE-style regexp and tries to match it
121
 * within the string.
122
 *
123
 * @param string $body   The string to look for needle in.
124
 * @param int    $offset Start looking from here.
125
 * @param string $reg    A PCRE-style regex to match.
126
 * @return array|bool  Returns a false if no matches found, or an array
127
 *                       with the following members:
128
 *                       - integer with the location of the match within $body
129
 *                       - string with whatever content between offset and the match
130
 *                       - string with whatever it is we matched
131
 */
132
function tln_findnxreg($body, $offset, $reg)
133
{
134
    $matches   = [];
135
    $retarr    = [];
136
    $preg_rule = '%^(.*?)(' . $reg . ')%s';
137
    preg_match($preg_rule, mb_substr($body, $offset), $matches);
138
    if (!isset($matches[0]) || !$matches[0]) {
139
        $retarr = false;
140
    } else {
141
        $retarr[0] = $offset + mb_strlen($matches[1]);
142
        $retarr[1] = $matches[1];
143
        $retarr[2] = $matches[2];
144
    }
145
146
    return $retarr;
147
}
148
149
/**
150
 * This function looks for the next tag.
151
 *
152
 * @param string $body   String where to look for the next tag.
153
 * @param int    $offset Start looking from here.
154
 * @return array|bool false if no more tags exist in the body, or
155
 *                       an array with the following members:
156
 *                       - string with the name of the tag
157
 *                       - array with attributes and their values
158
 *                       - integer with tag type (1, 2, or 3)
159
 *                       - integer where the tag starts (starting "<")
160
 *                       - integer where the tag ends (ending ">")
161
 *                       first three members will be false, if the tag is invalid.
162
 */
163
function tln_getnxtag($body, $offset)
164
{
165
    if ($offset > mb_strlen($body)) {
166
        return false;
167
    }
168
    $lt = tln_findnxstr($body, $offset, '<');
169
    if ($lt == mb_strlen($body)) {
170
        return false;
171
    }
172
    /**
173
     * We are here:
174
     * blah blah <tag attribute="value">
175
     * \---------^
176
     */
177
    $pos = tln_skipspace($body, $lt + 1);
178 View Code Duplication
    if ($pos >= mb_strlen($body)) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
179
        return [false, false, false, $lt, mb_strlen($body)];
180
    }
181
    /**
182
     * There are 3 kinds of tags:
183
     * 1. Opening tag, e.g.:
184
     *    <a href="blah">
185
     * 2. Closing tag, e.g.:
186
     *    </a>
187
     * 3. XHTML-style content-less tag, e.g.:
188
     *    <img src="blah"/>
189
     */
190
    switch (mb_substr($body, $pos, 1)) {
191
        case '/':
192
            $tagtype = 2;
193
            $pos++;
194
            break;
195
        case '!':
196
            /**
197
             * A comment or an SGML declaration.
198
             */
199
            if ('--' == mb_substr($body, $pos + 1, 2)) {
200
                $gt = mb_strpos($body, '-->', $pos);
201
                if (false === $gt) {
202
                    $gt = mb_strlen($body);
203
                } else {
204
                    $gt += 2;
205
                }
206
207
                return [false, false, false, $lt, $gt];
208
            }
209
            $gt = tln_findnxstr($body, $pos, '>');
210
211
            return [false, false, false, $lt, $gt];
212
            break;
0 ignored issues
show
Unused Code introduced by
break is not strictly necessary here and could be removed.

The break statement is not necessary if it is preceded for example by a return statement:

switch ($x) {
    case 1:
        return 'foo';
        break; // This break is not necessary and can be left off.
}

If you would like to keep this construct to be consistent with other case statements, you can safely mark this issue as a false-positive.

Loading history...
213
        default:
214
            /**
215
             * Assume tagtype 1 for now. If it's type 3, we'll switch values
216
             * later.
217
             */
218
            $tagtype = 1;
219
            break;
220
    }
221
222
    /**
223
     * Look for next [\W-_], which will indicate the end of the tag name.
224
     */
225
    $regary = tln_findnxreg($body, $pos, '[^\w\-_]');
226 View Code Duplication
    if (false == $regary) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
227
        return [false, false, false, $lt, mb_strlen($body)];
228
    }
229
    list($pos, $tagname, $match) = $regary;
230
    $tagname = mb_strtolower($tagname);
231
232
    /**
233
     * $match can be either of these:
234
     * '>'  indicating the end of the tag entirely.
235
     * '\s' indicating the end of the tag name.
236
     * '/'  indicating that this is type-3 xhtml tag.
237
     *
238
     * Whatever else we find there indicates an invalid tag.
239
     */
240
    switch ($match) {
241 View Code Duplication
        case '/':
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
242
            /**
243
             * This is an xhtml-style tag with a closing / at the
244
             * end, like so: <img src="blah"/>. Check if it's followed
245
             * by the closing bracket. If not, then this tag is invalid
246
             */
247
            if ('/>' == mb_substr($body, $pos, 2)) {
248
                $pos++;
249
                $tagtype = 3;
250
            } else {
251
                $gt     = tln_findnxstr($body, $pos, '>');
252
                $retary = [false, false, false, $lt, $gt];
253
254
                return $retary;
255
            }
256
        //intentional fall-through
257
        // no break
258
        case '>':
259
            return [$tagname, false, $tagtype, $lt, $pos];
260
            break;
0 ignored issues
show
Unused Code introduced by
break is not strictly necessary here and could be removed.

The break statement is not necessary if it is preceded for example by a return statement:

switch ($x) {
    case 1:
        return 'foo';
        break; // This break is not necessary and can be left off.
}

If you would like to keep this construct to be consistent with other case statements, you can safely mark this issue as a false-positive.

Loading history...
261
        default:
262
            /**
263
             * Check if it's whitespace
264
             */
265
            if (!preg_match('/\s/', $match)) {
266
                /**
267
                 * This is an invalid tag! Look for the next closing ">".
268
                 */
269
                $gt = tln_findnxstr($body, $lt, '>');
270
271
                return [false, false, false, $lt, $gt];
272
            }
273
            break;
274
    }
275
276
    /**
277
     * At this point we're here:
278
     * <tagname  attribute='blah'>
279
     * \-------^
280
     *
281
     * At this point we loop in order to find all attributes.
282
     */
283
    $attary = [];
284
285
    while ($pos <= mb_strlen($body)) {
286
        $pos = tln_skipspace($body, $pos);
287 View Code Duplication
        if ($pos == mb_strlen($body)) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
288
            /**
289
             * Non-closed tag.
290
             */
291
            return [false, false, false, $lt, $pos];
292
        }
293
        /**
294
         * See if we arrived at a ">" or "/>", which means that we reached
295
         * the end of the tag.
296
         */
297
        $matches = [];
298
        if (preg_match('%^(\s*)(>|/>)%s', mb_substr($body, $pos), $matches)) {
299
            /**
300
             * Yep. So we did.
301
             */
302
            $pos += mb_strlen($matches[1]);
303
            if ('/>' == $matches[2]) {
304
                $tagtype = 3;
305
                $pos++;
306
            }
307
308
            return [$tagname, $attary, $tagtype, $lt, $pos];
309
        }
310
311
        /**
312
         * There are several types of attributes, with optional
313
         * [:space:] between members.
314
         * Type 1:
315
         *   attrname[:space:]=[:space:]'CDATA'
316
         * Type 2:
317
         *   attrname[:space:]=[:space:]"CDATA"
318
         * Type 3:
319
         *   attr[:space:]=[:space:]CDATA
320
         * Type 4:
321
         *   attrname
322
         *
323
         * We leave types 1 and 2 the same, type 3 we check for
324
         * '"' and convert to "&quot" if needed, then wrap in
325
         * double quotes. Type 4 we convert into:
326
         * attrname="yes".
327
         */
328
        $regary = tln_findnxreg($body, $pos, '[^\w\-_]');
329 View Code Duplication
        if (false == $regary) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
330
            /**
331
             * Looks like body ended before the end of tag.
332
             */
333
            return [false, false, false, $lt, mb_strlen($body)];
334
        }
335
        list($pos, $attname, $match) = $regary;
336
        $attname = mb_strtolower($attname);
337
        /**
338
         * We arrived at the end of attribute name. Several things possible
339
         * here:
340
         * '>'  means the end of the tag and this is attribute type 4
341
         * '/'  if followed by '>' means the same thing as above
342
         * '\s' means a lot of things -- look what it's followed by.
343
         *      anything else means the attribute is invalid.
344
         */
345
        switch ($match) {
346 View Code Duplication
            case '/':
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
347
                /**
348
                 * This is an xhtml-style tag with a closing / at the
349
                 * end, like so: <img src="blah"/>. Check if it's followed
350
                 * by the closing bracket. If not, then this tag is invalid
351
                 */
352
                if ('/>' == mb_substr($body, $pos, 2)) {
353
                    $pos++;
354
                    $tagtype = 3;
355
                } else {
356
                    $gt     = tln_findnxstr($body, $pos, '>');
357
                    $retary = [false, false, false, $lt, $gt];
358
359
                    return $retary;
360
                }
361
            //intentional fall-through
362
            // no break
363
            case '>':
364
                $attary[$attname] = '"yes"';
365
366
                return [$tagname, $attary, $tagtype, $lt, $pos];
367
                break;
0 ignored issues
show
Unused Code introduced by
break is not strictly necessary here and could be removed.

The break statement is not necessary if it is preceded for example by a return statement:

switch ($x) {
    case 1:
        return 'foo';
        break; // This break is not necessary and can be left off.
}

If you would like to keep this construct to be consistent with other case statements, you can safely mark this issue as a false-positive.

Loading history...
368
            default:
369
                /**
370
                 * Skip whitespace and see what we arrive at.
371
                 */
372
                $pos  = tln_skipspace($body, $pos);
373
                $char = mb_substr($body, $pos, 1);
374
                /**
375
                 * Two things are valid here:
376
                 * '=' means this is attribute type 1 2 or 3.
377
                 * \w means this was attribute type 4.
378
                 * anything else we ignore and re-loop. End of tag and
379
                 * invalid stuff will be caught by our checks at the beginning
380
                 * of the loop.
381
                 */
382
                if ('=' == $char) {
383
                    $pos++;
384
                    $pos = tln_skipspace($body, $pos);
385
                    /**
386
                     * Here are 3 possibilities:
387
                     * "'"  attribute type 1
388
                     * '"'  attribute type 2
389
                     * everything else is the content of tag type 3
390
                     */
391
                    $quot = mb_substr($body, $pos, 1);
392
                    if ('\'' == $quot) {
393
                        $regary = tln_findnxreg($body, $pos + 1, '\'');
394 View Code Duplication
                        if (false == $regary) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
395
                            return [false, false, false, $lt, mb_strlen($body)];
396
                        }
397
                        list($pos, $attval, $match) = $regary;
398
                        $pos++;
399
                        $attary[$attname] = '\'' . $attval . '\'';
400
                    } elseif ('"' == $quot) {
401
                        $regary = tln_findnxreg($body, $pos + 1, '\"');
402 View Code Duplication
                        if (false == $regary) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
403
                            return [false, false, false, $lt, mb_strlen($body)];
404
                        }
405
                        list($pos, $attval, $match) = $regary;
406
                        $pos++;
407
                        $attary[$attname] = '"' . $attval . '"';
408
                    } else {
409
                        /**
410
                         * These are hateful. Look for \s, or >.
411
                         */
412
                        $regary = tln_findnxreg($body, $pos, '[\s>]');
413 View Code Duplication
                        if (false == $regary) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
414
                            return [false, false, false, $lt, mb_strlen($body)];
415
                        }
416
                        list($pos, $attval, $match) = $regary;
417
                        /**
418
                         * If it's ">" it will be caught at the top.
419
                         */
420
                        $attval           = preg_replace('/\"/s', '&quot;', $attval);
421
                        $attary[$attname] = '"' . $attval . '"';
422
                    }
423
                } elseif (preg_match('|[\w/>]|', $char)) {
424
                    /**
425
                     * That was attribute type 4.
426
                     */
427
                    $attary[$attname] = '"yes"';
428
                } else {
429
                    /**
430
                     * An illegal character. Find next '>' and return.
431
                     */
432
                    $gt = tln_findnxstr($body, $pos, '>');
433
434
                    return [false, false, false, $lt, $gt];
435
                }
436
                break;
437
        }
438
    }
439
    /**
440
     * The fact that we got here indicates that the tag end was never
441
     * found. Return invalid tag indication so it gets stripped.
442
     */
443
    return [false, false, false, $lt, mb_strlen($body)];
444
}
445
446
/**
447
 * Translates entities into literal values so they can be checked.
448
 *
449
 * @param string $attvalue the by-ref value to check.
450
 * @param string $regex    the regular expression to check against.
451
 * @param bool   $hex      whether the entities are hexadecimal.
452
 * @return bool            True or False depending on whether there were matches.
453
 */
454
function tln_deent(&$attvalue, $regex, $hex = false)
455
{
456
    preg_match_all($regex, $attvalue, $matches);
457
    if (is_array($matches) && count($matches[0]) > 0) {
458
        $repl = [];
459
        for ($i = 0; $i < count($matches[0]); $i++) {
0 ignored issues
show
Performance Best Practice introduced by
It seems like you are calling the size function count() as part of the test condition. You might want to compute the size beforehand, and not on each iteration.

If the size of the collection does not change during the iteration, it is generally a good practice to compute it beforehand, and not on each iteration:

for ($i=0; $i<count($array); $i++) { // calls count() on each iteration
}

// Better
for ($i=0, $c=count($array); $i<$c; $i++) { // calls count() just once
}
Loading history...
460
            $numval = $matches[1][$i];
461
            if ($hex) {
462
                $numval = hexdec($numval);
463
            }
464
            $repl[$matches[0][$i]] = chr($numval);
465
        }
466
        $attvalue = strtr($attvalue, $repl);
467
468
        return true;
469
    }
470
471
    return false;
472
}
473
474
/**
475
 * This function checks attribute values for entity-encoded values
476
 * and returns them translated into 8-bit strings so we can run
477
 * checks on them.
478
 *
479
 * @param string $attvalue A string to run entity check against.
480
 */
481
function tln_defang(&$attvalue)
482
{
483
    /**
484
     * Skip this if there aren't ampersands or backslashes.
485
     */
486
    if (false === mb_strpos($attvalue, '&')
487
        && false === mb_strpos($attvalue, '\\')) {
488
        return;
489
    }
490
    do {
491
        $m = false;
492
        $m = $m || tln_deent($attvalue, '/\&#0*(\d+);*/s');
493
        $m = $m || tln_deent($attvalue, '/\&#x0*((\d|[a-f])+);*/si', true);
494
        $m = $m || tln_deent($attvalue, '/\\\\(\d+)/s', true);
495
    } while (true == $m);
0 ignored issues
show
Coding Style Best Practice introduced by
It seems like you are loosely comparing two booleans. Considering using the strict comparison === instead.

When comparing two booleans, it is generally considered safer to use the strict comparison operator.

Loading history...
496
    $attvalue = stripslashes($attvalue);
497
}
498
499
/**
500
 * Kill any tabs, newlines, or carriage returns. Our friends the
501
 * makers of the browser with 95% market value decided that it'd
502
 * be funny to make "java[tab]script" be just as good as "javascript".
503
 *
504
 * @param string $attvalue The attribute value before extraneous spaces removed.
505
 */
506
function tln_unspace(&$attvalue)
507
{
508
    if (strcspn($attvalue, "\t\r\n\0 ") != mb_strlen($attvalue)) {
509
        $attvalue = str_replace(["\t", "\r", "\n", "\0", ' '], ['', '', '', '', ''], $attvalue);
510
    }
511
}
512
513
/**
514
 * This function runs various checks against the attributes.
515
 *
516
 * @param string $tagname         String with the name of the tag.
517
 * @param array  $attary          Array with all tag attributes.
518
 * @param array  $rm_attnames     See description for tln_sanitize
519
 * @param array  $bad_attvals     See description for tln_sanitize
520
 * @param array  $add_attr_to_tag See description for tln_sanitize
521
 * @param string $trans_image_path
522
 * @param bool   $block_external_images
523
 * @return array with modified attributes.
524
 */
525
function tln_fixatts(
526
    $tagname,
527
    $attary,
528
    $rm_attnames,
529
    $bad_attvals,
530
    $add_attr_to_tag,
531
    $trans_image_path,
532
    $block_external_images)
533
{
534
    foreach ($attary as $attname => $attvalue) {
535
        /**
536
         * See if this attribute should be removed.
537
         */
538
        foreach ($rm_attnames as $matchtag => $matchattrs) {
539
            if (preg_match($matchtag, $tagname)) {
540
                foreach ($matchattrs as $matchattr) {
541
                    if (preg_match($matchattr, $attname)) {
542
                        unset($attary[$attname]);
543
                        continue;
544
                    }
545
                }
546
            }
547
        }
548
        /**
549
         * Remove any backslashes, entities, or extraneous whitespace.
550
         */
551
        $oldattvalue = $attvalue;
552
        tln_defang($attvalue);
553
        if ('style' == $attname && $attvalue !== $oldattvalue) {
554
            $attvalue         = 'idiocy';
555
            $attary[$attname] = $attvalue;
556
        }
557
        tln_unspace($attvalue);
558
559
        /**
560
         * Now let's run checks on the attvalues.
561
         * I don't expect anyone to comprehend this. If you do,
562
         * get in touch with me so I can drive to where you live and
563
         * shake your hand personally. :)
564
         */
565
        foreach ($bad_attvals as $matchtag => $matchattrs) {
566
            if (preg_match($matchtag, $tagname)) {
567
                foreach ($matchattrs as $matchattr => $valary) {
568
                    if (preg_match($matchattr, $attname)) {
569
                        /**
570
                         * There are two arrays in valary.
571
                         * First is matches.
572
                         * Second one is replacements
573
                         */
574
                        list($valmatch, $valrepl) = $valary;
575
                        $newvalue = preg_replace($valmatch, $valrepl, $attvalue);
576
                        if ($newvalue != $attvalue) {
577
                            $attary[$attname] = $newvalue;
578
                            $attvalue         = $newvalue;
579
                        }
580
                    }
581
                }
582
            }
583
        }
584
        if ('style' == $attname) {
585
            if (preg_match('/[\0-\37\200-\377]+/', $attvalue)) {
586
                $attary[$attname] = '"disallowed character"';
587
            }
588
            preg_match_all("/url\s*\((.+)\)/si", $attvalue, $aMatch);
589
            if (count($aMatch)) {
590 View Code Duplication
                foreach ($aMatch[1] as $sMatch) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
591
                    $urlvalue = $sMatch;
592
                    tln_fixurl($attname, $urlvalue, $trans_image_path, $block_external_images);
593
                    $attary[$attname] = str_replace($sMatch, $urlvalue, $attvalue);
594
                }
595
            }
596
        }
597
    }
598
    /**
599
     * See if we need to append any attributes to this tag.
600
     */
601
    foreach ($add_attr_to_tag as $matchtag => $addattary) {
602
        if (preg_match($matchtag, $tagname)) {
603
            $attary = array_merge($attary, $addattary);
604
        }
605
    }
606
607
    return $attary;
608
}
609
610
function tln_fixurl($attname, &$attvalue, $trans_image_path, $block_external_images)
611
{
612
    $sQuote   = '"';
613
    $attvalue = trim($attvalue);
614
    if ($attvalue && ('"' == $attvalue[0] || "'" == $attvalue[0])) {
615
        // remove the double quotes
616
        $sQuote   = $attvalue[0];
617
        $attvalue = trim(mb_substr($attvalue, 1, -1));
618
    }
619
620
    /**
621
     * Replace empty src tags with the blank image.  src is only used
622
     * for frames, images, and image inputs.  Doing a replace should
623
     * not affect them working as should be, however it will stop
624
     * IE from being kicked off when src for img tags are not set
625
     */
626
    if ('' == $attvalue) {
627
        $attvalue = $sQuote . $trans_image_path . $sQuote;
628
    } else {
629
        // first, disallow 8 bit characters and control characters
630
        if (preg_match('/[\0-\37\200-\377]+/', $attvalue)) {
631
            switch ($attname) {
632
                case 'href':
633
                    $attvalue = $sQuote . 'http://invalid-stuff-detected.example.com' . $sQuote;
634
                    break;
635
                default:
636
                    $attvalue = $sQuote . $trans_image_path . $sQuote;
637
                    break;
638
            }
639
        } else {
640
            $aUrl = parse_url($attvalue);
641
            if (isset($aUrl['scheme'])) {
642
                switch (mb_strtolower($aUrl['scheme'])) {
643
                    case 'mailto':
644
                    case 'http':
645
                    case 'https':
646
                    case 'ftp':
647
                        if ('href' != $attname) {
648
                            if (true == $block_external_images) {
649
                                $attvalue = $sQuote . $trans_image_path . $sQuote;
650
                            } else {
651
                                if (!isset($aUrl['path'])) {
652
                                    $attvalue = $sQuote . $trans_image_path . $sQuote;
653
                                }
654
                            }
655
                        } else {
656
                            $attvalue = $sQuote . $attvalue . $sQuote;
657
                        }
658
                        break;
659
                    case 'outbind':
660
                        $attvalue = $sQuote . $attvalue . $sQuote;
661
                        break;
662
                    case 'cid':
663
                        $attvalue = $sQuote . $attvalue . $sQuote;
664
                        break;
665
                    default:
666
                        $attvalue = $sQuote . $trans_image_path . $sQuote;
667
                        break;
668
                }
669
            } else {
670
                if (!isset($aUrl['path']) || $aUrl['path'] != $trans_image_path) {
671
                    $$attvalue = $sQuote . $trans_image_path . $sQuote;
672
                }
673
            }
674
        }
675
    }
676
}
677
678
/**
679
 * @param string $body
680
 * @param int    $pos
681
 * @param string $trans_image_path
682
 * @param bool   $block_external_images
683
 */
684
function tln_fixstyle($body, $pos, $trans_image_path, $block_external_images)
685
{
686
    // workaround for </style> in between comments
687
    $content = '';
688
    $sToken  = '';
689
    $bSucces = false;
690
    $bEndTag = false;
691
    for ($i = $pos, $iCount = mb_strlen($body); $i < $iCount; ++$i) {
692
        $char = $body[$i];
693
        switch ($char) {
694
            case '<':
695
                $sToken = $char;
696
                break;
697
            case '/':
698
                if ('<' == $sToken) {
699
                    $sToken  .= $char;
700
                    $bEndTag = true;
701
                } else {
702
                    $content .= $char;
703
                }
704
                break;
705
            case '>':
706
                if ($bEndTag) {
707
                    $sToken .= $char;
708
                    if (preg_match('/\<\/\s*style\s*\>/i', $sToken, $aMatch)) {
709
                        $newpos  = $i + 1;
710
                        $bSucces = true;
711
                        break 2;
712
                    }
713
                    $content .= $sToken;
714
715
                    $bEndTag = false;
716
                } else {
717
                    $content .= $char;
718
                }
719
                break;
720
            case '!':
721
                if ('<' == $sToken) {
722
                    // possible comment
723
                    if (isset($body[$i + 2]) && '!--' == mb_substr($body, $i, 3)) {
724
                        $i = mb_strpos($body, '-->', $i + 3);
725
                        if (false === $i) { // no end comment
726
                            $i = mb_strlen($body);
727
                        }
728
                        $sToken = '';
729
                    }
730
                } else {
731
                    $content .= $char;
732
                }
733
                break;
734
            default:
735
                if ($bEndTag) {
736
                    $sToken .= $char;
737
                } else {
738
                    $content .= $char;
739
                }
740
                break;
741
        }
742
    }
743
    if (false == $bSucces) {
0 ignored issues
show
Coding Style Best Practice introduced by
It seems like you are loosely comparing two booleans. Considering using the strict comparison === instead.

When comparing two booleans, it is generally considered safer to use the strict comparison operator.

Loading history...
744
        return [false, mb_strlen($body)];
745
    }
746
747
    /**
748
     * First look for general BODY style declaration, which would be
749
     * like so:
750
     * body {background: blah-blah}
751
     * and change it to .bodyclass so we can just assign it to a <div>
752
     */
753
    $content = preg_replace("|body(\s*\{.*?\})|si", '.bodyclass\\1', $content);
754
755
    /**
756
     * Fix url('blah') declarations.
757
     */
758
    //   $content = preg_replace("|url\s*\(\s*([\'\"])\s*\S+script\s*:.*?([\'\"])\s*\)|si",
759
    //                           "url(\\1$trans_image_path\\2)", $content);
760
761
    // first check for 8bit sequences and disallowed control characters
762
    if (preg_match('/[\16-\37\200-\377]+/', $content)) {
763
        $content = '<!-- style block removed by html filter due to presence of 8bit characters -->';
764
765
        return [$content, $newpos];
0 ignored issues
show
Bug introduced by
The variable $newpos does not seem to be defined for all execution paths leading up to this point.

If you define a variable conditionally, it can happen that it is not defined for all execution paths.

Let’s take a look at an example:

function myFunction($a) {
    switch ($a) {
        case 'foo':
            $x = 1;
            break;

        case 'bar':
            $x = 2;
            break;
    }

    // $x is potentially undefined here.
    echo $x;
}

In the above example, the variable $x is defined if you pass “foo” or “bar” as argument for $a. However, since the switch statement has no default case statement, if you pass any other value, the variable $x would be undefined.

Available Fixes

  1. Check for existence of the variable explicitly:

    function myFunction($a) {
        switch ($a) {
            case 'foo':
                $x = 1;
                break;
    
            case 'bar':
                $x = 2;
                break;
        }
    
        if (isset($x)) { // Make sure it's always set.
            echo $x;
        }
    }
    
  2. Define a default value for the variable:

    function myFunction($a) {
        $x = ''; // Set a default which gets overridden for certain paths.
        switch ($a) {
            case 'foo':
                $x = 1;
                break;
    
            case 'bar':
                $x = 2;
                break;
        }
    
        echo $x;
    }
    
  3. Add a value for the missing path:

    function myFunction($a) {
        switch ($a) {
            case 'foo':
                $x = 1;
                break;
    
            case 'bar':
                $x = 2;
                break;
    
            // We add support for the missing case.
            default:
                $x = '';
                break;
        }
    
        echo $x;
    }
    
Loading history...
766
    }
767
768
    // remove @import line
769
    $content = preg_replace("/^\s*(@import.*)$/mi", "\n<!-- @import rules forbidden -->\n", $content);
770
771
    $content = preg_replace('/(\\\\)?u(\\\\)?r(\\\\)?l(\\\\)?/i', 'url', $content);
772
    preg_match_all("/url\s*\((.+)\)/si", $content, $aMatch);
773
    if (count($aMatch)) {
774
        $aValue = $aReplace = [];
775 View Code Duplication
        foreach ($aMatch[1] as $sMatch) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
776
            // url value
777
            $urlvalue = $sMatch;
778
            tln_fixurl('style', $urlvalue, $trans_image_path, $block_external_images);
779
            $aValue[]   = $sMatch;
780
            $aReplace[] = $urlvalue;
781
        }
782
        $content = str_replace($aValue, $aReplace, $content);
783
    }
784
785
    /**
786
     * Remove any backslashes, entities, and extraneous whitespace.
787
     */
788
    $contentTemp = $content;
789
    tln_defang($contentTemp);
790
    tln_unspace($contentTemp);
791
792
    $match      = [
793
        '/\/\*.*\*\//',
794
        '/expression/i',
795
        '/behaviou*r/i',
796
        '/binding/i',
797
        '/include-source/i',
798
        '/javascript/i',
799
        '/script/i',
800
        '/position/i',
801
    ];
802
    $replace    = ['', 'idiocy', 'idiocy', 'idiocy', 'idiocy', 'idiocy', 'idiocy', ''];
803
    $contentNew = preg_replace($match, $replace, $contentTemp);
804
    if ($contentNew !== $contentTemp) {
805
        $content = $contentNew;
806
    }
807
808
    return [$content, $newpos];
809
}
810
811
/**
812
 * @param string $trans_image_path
813
 */
814
function tln_body2div($attary, $trans_image_path)
815
{
816
    $divattary   = ['class' => "'bodyclass'"];
817
    $text        = '#000000';
818
    $has_bgc_stl = $has_txt_stl = false;
819
    $styledef    = '';
820
    if (is_array($attary) && count($attary) > 0) {
821
        foreach ($attary as $attname => $attvalue) {
822
            $quotchar = mb_substr($attvalue, 0, 1);
823
            $attvalue = str_replace($quotchar, '', $attvalue);
824
            switch ($attname) {
825
                case 'background':
826
                    $styledef .= "background-image: url('$trans_image_path'); ";
827
                    break;
828
                case 'bgcolor':
829
                    $has_bgc_stl = true;
830
                    $styledef    .= "background-color: $attvalue; ";
831
                    break;
832
                case 'text':
833
                    $has_txt_stl = true;
834
                    $styledef    .= "color: $attvalue; ";
835
                    break;
836
            }
837
        }
838
        // Outlook defines a white bgcolor and no text color. This can lead to
839
        // white text on a white bg with certain themes.
840
        if ($has_bgc_stl && !$has_txt_stl) {
841
            $styledef .= "color: $text; ";
842
        }
843
        if (mb_strlen($styledef) > 0) {
844
            $divattary['style'] = "\"$styledef\"";
845
        }
846
    }
847
848
    return $divattary;
849
}
850
851
/**
852
 * @param string $body                 The HTML you wish to filter
853
 * @param array  $tag_list             see description above
854
 * @param string[]  $rm_tags_with_content see description above
855
 * @param string[]  $self_closing_tags    see description above
856
 * @param bool   $force_tag_closing    see description above
857
 * @param array  $rm_attnames          see description above
858
 * @param array  $bad_attvals          see description above
859
 * @param array  $add_attr_to_tag      see description above
860
 * @param string $trans_image_path
861
 * @param bool   $block_external_images
862
 * @return string                       Sanitized html safe to show on your pages.
863
 */
864
function tln_sanitize(
865
    $body,
866
    $tag_list,
867
    $rm_tags_with_content,
868
    $self_closing_tags,
869
    $force_tag_closing,
870
    $rm_attnames,
871
    $bad_attvals,
872
    $add_attr_to_tag,
873
    $trans_image_path,
874
    $block_external_images)
875
{
876
    /**
877
     * Normalize rm_tags and rm_tags_with_content.
878
     */
879
    $rm_tags = array_shift($tag_list);
880
    @array_walk($tag_list, 'tln_casenormalize');
0 ignored issues
show
Security Best Practice introduced by
It seems like you do not handle an error condition here. This can introduce security issues, and is generally not recommended.

If you suppress an error, we recommend checking for the error condition explicitly:

// For example instead of
@mkdir($dir);

// Better use
if (@mkdir($dir) === false) {
    throw new \RuntimeException('The directory '.$dir.' could not be created.');
}
Loading history...
881
    @array_walk($rm_tags_with_content, 'tln_casenormalize');
0 ignored issues
show
Security Best Practice introduced by
It seems like you do not handle an error condition here. This can introduce security issues, and is generally not recommended.

If you suppress an error, we recommend checking for the error condition explicitly:

// For example instead of
@mkdir($dir);

// Better use
if (@mkdir($dir) === false) {
    throw new \RuntimeException('The directory '.$dir.' could not be created.');
}
Loading history...
882
    @array_walk($self_closing_tags, 'tln_casenormalize');
0 ignored issues
show
Security Best Practice introduced by
It seems like you do not handle an error condition here. This can introduce security issues, and is generally not recommended.

If you suppress an error, we recommend checking for the error condition explicitly:

// For example instead of
@mkdir($dir);

// Better use
if (@mkdir($dir) === false) {
    throw new \RuntimeException('The directory '.$dir.' could not be created.');
}
Loading history...
883
    /**
884
     * See if tag_list is of tags to remove or tags to allow.
885
     * false  means remove these tags
886
     * true   means allow these tags
887
     */
888
    $curpos       = 0;
889
    $open_tags    = [];
890
    $trusted      = "<!-- begin tln_sanitized html -->\n";
891
    $skip_content = false;
892
    /**
893
     * Take care of netscape's stupid javascript entities like
894
     * &{alert('boo')};
895
     */
896
    $body = preg_replace('/&(\{.*?\};)/si', '&amp;\\1', $body);
897
    while (false != ($curtag = tln_getnxtag($body, $curpos))) {
898
        list($tagname, $attary, $tagtype, $lt, $gt) = $curtag;
899
        $free_content = mb_substr($body, $curpos, $lt - $curpos);
900
        /**
901
         * Take care of <style>
902
         */
903
        if ('style' == $tagname && 1 == $tagtype) {
904
            list($free_content, $curpos) = tln_fixstyle($body, $gt + 1, $trans_image_path, $block_external_images);
905
            if (false != $free_content) {
906
                if (!empty($attary)) {
907
                    $attary = tln_fixatts($tagname, $attary, $rm_attnames, $bad_attvals, $add_attr_to_tag, $trans_image_path, $block_external_images);
908
                }
909
                $trusted .= tln_tagprint($tagname, $attary, $tagtype);
910
                $trusted .= $free_content;
911
                $trusted .= tln_tagprint($tagname, null, 2);
0 ignored issues
show
Documentation introduced by
null is of type null, but the function expects a array.

It seems like the type of the argument is not accepted by the function/method which you are calling.

In some cases, in particular if PHP’s automatic type-juggling kicks in this might be fine. In other cases, however this might be a bug.

We suggest to add an explicit type cast like in the following example:

function acceptsInteger($int) { }

$x = '123'; // string "123"

// Instead of
acceptsInteger($x);

// we recommend to use
acceptsInteger((integer) $x);
Loading history...
912
            }
913
            continue;
914
        }
915
        if (false == $skip_content) {
916
            $trusted .= $free_content;
917
        }
918
        if (false != $tagname) {
919
            if (2 == $tagtype) {
920
                if ($skip_content == $tagname) {
921
                    /**
922
                     * Got to the end of tag we needed to remove.
923
                     */
924
                    $tagname      = false;
925
                    $skip_content = false;
926
                } else {
927
                    if (false == $skip_content) {
928
                        if ('body' == $tagname) {
929
                            $tagname = 'div';
930
                        }
931 View Code Duplication
                        if (isset($open_tags[$tagname])
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
932
                            && $open_tags[$tagname] > 0) {
933
                            $open_tags[$tagname]--;
934
                        } else {
935
                            $tagname = false;
936
                        }
937
                    }
938
                }
939
            } else {
940
                /**
941
                 * $rm_tags_with_content
942
                 */
943
                if (false == $skip_content) {
944
                    /**
945
                     * See if this is a self-closing type and change
946
                     * tagtype appropriately.
947
                     */
948
                    if (1 == $tagtype
949
                        && in_array($tagname, $self_closing_tags, true)) {
950
                        $tagtype = 3;
951
                    }
952
                    /**
953
                     * See if we should skip this tag and any content
954
                     * inside it.
955
                     */
956
                    if (1 == $tagtype
957
                        && in_array($tagname, $rm_tags_with_content, true)) {
958
                        $skip_content = $tagname;
959
                    } else {
960
                        if ((false == $rm_tags
961
                             && in_array($tagname, $tag_list, true))
962
                            || (true == $rm_tags
963
                                && !in_array($tagname, $tag_list, true))) {
964
                            $tagname = false;
965
                        } else {
966
                            /**
967
                             * Convert body into div.
968
                             */
969
                            if ('body' == $tagname) {
970
                                $tagname = 'div';
971
                                $attary  = tln_body2div($attary, $trans_image_path);
972
                            }
973 View Code Duplication
                            if (1 == $tagtype) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
974
                                if (isset($open_tags[$tagname])) {
975
                                    $open_tags[$tagname]++;
976
                                } else {
977
                                    $open_tags[$tagname] = 1;
978
                                }
979
                            }
980
                            /**
981
                             * This is where we run other checks.
982
                             */
983
                            if (is_array($attary) && count($attary) > 0) {
984
                                $attary = tln_fixatts($tagname, $attary, $rm_attnames, $bad_attvals, $add_attr_to_tag, $trans_image_path, $block_external_images);
985
                            }
986
                        }
987
                    }
988
                }
989
            }
990
            if (false != $tagname && false == $skip_content) {
991
                $trusted .= tln_tagprint($tagname, $attary, $tagtype);
992
            }
993
        }
994
        $curpos = $gt + 1;
995
    }
996
    $trusted .= mb_substr($body, $curpos, mb_strlen($body) - $curpos);
997
    if (true == $force_tag_closing) {
0 ignored issues
show
Coding Style Best Practice introduced by
It seems like you are loosely comparing two booleans. Considering using the strict comparison === instead.

When comparing two booleans, it is generally considered safer to use the strict comparison operator.

Loading history...
998
        foreach ($open_tags as $tagname => $opentimes) {
999
            while ($opentimes > 0) {
1000
                $trusted .= '</' . $tagname . '>';
1001
                $opentimes--;
1002
            }
1003
        }
1004
        $trusted .= "\n";
1005
    }
1006
    $trusted .= "<!-- end tln_sanitized html -->\n";
1007
1008
    return $trusted;
1009
}
1010
1011
//
1012
// Use the nifty htmlfilter library
1013
//
1014
1015
function HTMLFilter($body, $trans_image_path, $block_external_images = false)
1016
{
1017
    $tag_list = [
1018
        false,
1019
        'object',
1020
        'meta',
1021
        'html',
1022
        'head',
1023
        'base',
1024
        'link',
1025
        'frame',
1026
        'iframe',
1027
        'plaintext',
1028
        'marquee',
1029
    ];
1030
1031
    $rm_tags_with_content = [
1032
        'script',
1033
        'applet',
1034
        'embed',
1035
        'title',
1036
        'frameset',
1037
        'xmp',
1038
        'xml',
1039
    ];
1040
1041
    $self_closing_tags = [
1042
        'img',
1043
        'br',
1044
        'hr',
1045
        'input',
1046
        'outbind',
1047
    ];
1048
1049
    $force_tag_closing = true;
1050
1051
    $rm_attnames = [
1052
        '/.*/' => [
1053
            // "/target/i",
1054
            '/^on.*/i',
1055
            '/^dynsrc/i',
1056
            '/^data.*/i',
1057
            '/^lowsrc.*/i',
1058
        ],
1059
    ];
1060
1061
    $bad_attvals = [
1062
        '/.*/' => [
1063
            '/^src|background/i' => [
1064
                [
1065
                    '/^([\'"])\s*\S+script\s*:.*([\'"])/si',
1066
                    '/^([\'"])\s*mocha\s*:*.*([\'"])/si',
1067
                    '/^([\'"])\s*about\s*:.*([\'"])/si',
1068
                ],
1069
                [
1070
                    "\\1$trans_image_path\\2",
1071
                    "\\1$trans_image_path\\2",
1072
                    "\\1$trans_image_path\\2",
1073
                ],
1074
            ],
1075
            '/^href|action/i'    => [
1076
                [
1077
                    '/^([\'"])\s*\S+script\s*:.*([\'"])/si',
1078
                    '/^([\'"])\s*mocha\s*:*.*([\'"])/si',
1079
                    '/^([\'"])\s*about\s*:.*([\'"])/si',
1080
                ],
1081
                [
1082
                    '\\1#\\1',
1083
                    '\\1#\\1',
1084
                    '\\1#\\1',
1085
                ],
1086
            ],
1087
            '/^style/i'          => [
1088
                [
1089
                    "/\/\*.*\*\//",
1090
                    '/expression/i',
1091
                    '/binding/i',
1092
                    '/behaviou*r/i',
1093
                    '/include-source/i',
1094
                    '/position\s*:/i',
1095
                    '/(\\\\)?u(\\\\)?r(\\\\)?l(\\\\)?/i',
1096
                    '/url\s*\(\s*([\'"])\s*\S+script\s*:.*([\'"])\s*\)/si',
1097
                    '/url\s*\(\s*([\'"])\s*mocha\s*:.*([\'"])\s*\)/si',
1098
                    '/url\s*\(\s*([\'"])\s*about\s*:.*([\'"])\s*\)/si',
1099
                    '/(.*)\s*:\s*url\s*\(\s*([\'"]*)\s*\S+script\s*:.*([\'"]*)\s*\)/si',
1100
                ],
1101
                [
1102
                    '',
1103
                    'idiocy',
1104
                    'idiocy',
1105
                    'idiocy',
1106
                    'idiocy',
1107
                    'idiocy',
1108
                    'url',
1109
                    'url(\\1#\\1)',
1110
                    'url(\\1#\\1)',
1111
                    'url(\\1#\\1)',
1112
                    '\\1:url(\\2#\\3)',
1113
                ],
1114
            ],
1115
        ],
1116
    ];
1117
1118
    if ($block_external_images) {
1119
        array_push($bad_attvals['/.*/']['/^src|background/i'][0], '/^([\'\"])\s*https*:.*([\'\"])/si');
1120
        array_push($bad_attvals['/.*/']['/^src|background/i'][1], "\\1$trans_image_path\\1");
1121
        array_push($bad_attvals['/.*/']['/^style/i'][0], '/url\(([\'\"])\s*https*:.*([\'\"])\)/si');
1122
        array_push($bad_attvals['/.*/']['/^style/i'][1], "url(\\1$trans_image_path\\1)");
1123
    }
1124
1125
    $add_attr_to_tag = [
1126
        '/^a$/i' => ['target' => '"_blank"'],
1127
    ];
1128
1129
    $trusted = tln_sanitize($body, $tag_list, $rm_tags_with_content, $self_closing_tags, $force_tag_closing, $rm_attnames, $bad_attvals, $add_attr_to_tag, $trans_image_path, $block_external_images);
1130
1131
    return $trusted;
1132
}
1133