Completed
Push — namespace2 ( 791eac...5c23fb )
by Fabio
08:41
created

TSafeHtmlParser::_writeAttrs()   D

Complexity

Conditions 23
Paths 2

Size

Total Lines 94
Code Lines 52

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
cc 23
eloc 52
nc 2
nop 1
dl 0
loc 94
rs 4.6303
c 0
b 0
f 0

How to fix   Long Method    Complexity   

Long Method

Small methods make your code easier to understand, in particular if combined with a good name. Besides, if your method is small, finding a good name is usually much easier.

For example, if you find yourself adding comments to a method's body, this is usually a good sign to extract the commented part to a new method, and use the comment as a starting point when coming up with a good name for this new method.

Commonly applied refactorings include:

1
<?php
2
/* vim: set expandtab tabstop=4 shiftwidth=4 softtabstop=4: */
3
4
/**
5
 * SafeHTML Parser
6
 *
7
 * PHP versions 4 and 5
8
 *
9
 * @category   HTML
10
 * @package    System.Security
11
 * @author     Roman Ivanov <[email protected]>
12
 * @copyright  2004-2005 Roman Ivanov
13
 * @license    http://www.debian.org/misc/bsd.license  BSD License (3 Clause)
14
 * @version    1.3.7
15
 * @link       http://pixel-apes.com/safehtml/
16
 */
17
18
19
namespace Prado\Vendor\SafeHtml;
20
21
use Prado\Vendor\SafeHtml\HTMLSax3\TSax3;
22
23
/**
24
 *
25
 * TSafeHtmlParser
26
 *
27
 * This parser strips down all potentially dangerous content within HTML:
28
 * <ul>
29
 * <li>opening tag without its closing tag</li>
30
 * <li>closing tag without its opening tag</li>
31
 * <li>any of these tags: "base", "basefont", "head", "html", "body", "applet",
32
 * "object", "iframe", "frame", "frameset", "script", "layer", "ilayer", "embed",
33
 * "bgsound", "link", "meta", "style", "title", "blink", "xml" etc.</li>
34
 * <li>any of these attributes: on*, data*, dynsrc</li>
35
 * <li>javascript:/vbscript:/about: etc. protocols</li>
36
 * <li>expression/behavior etc. in styles</li>
37
 * <li>any other active content</li>
38
 * </ul>
39
 * It also tries to convert code to XHTML valid, but htmltidy is far better
40
 * solution for this task.
41
 *
42
 * <b>Example:</b>
43
 * <pre>
44
 * $parser = new Prado\Vendor\SafeHtml\TSafeHtmlParser;
45
 * $result = $parser->parse($doc);
46
 * </pre>
47
 *
48
 * @category   HTML
49
 * @package    System.Security
50
 * @author     Roman Ivanov <[email protected]>
51
 * @copyright  1997-2005 Roman Ivanov
52
 * @license    http://www.debian.org/misc/bsd.license  BSD License (3 Clause)
53
 * @version    Release: @package_version@
54
 * @link       http://pear.php.net/package/SafeHTML
55
 */
56
class TSafeHtmlParser
57
{
58
    /**
59
     * Storage for resulting HTML output
60
     *
61
     * @var string
62
     * @access private
63
     */
64
    private $_xhtml = '';
65
66
    /**
67
     * Array of counters for each tag
68
     *
69
     * @var array
70
     * @access private
71
     */
72
    private $_counter = array();
73
74
    /**
75
     * Stack of unclosed tags
76
     *
77
     * @var array
78
     * @access private
79
     */
80
    private $_stack = array();
81
82
    /**
83
     * Array of counters for tags that must be deleted with all content
84
     *
85
     * @var array
86
     * @access private
87
     */
88
    private $_dcCounter = array();
89
90
    /**
91
     * Stack of unclosed tags that must be deleted with all content
92
     *
93
     * @var array
94
     * @access private
95
     */
96
    private $_dcStack = array();
97
98
    /**
99
     * Stores level of list (ol/ul) nesting
100
     *
101
     * @var int
102
     * @access private
103
     */
104
    private $_listScope = 0;
105
106
    /**
107
     * Stack of unclosed list tags
108
     *
109
     * @var array
110
     * @access private
111
     */
112
    private $_liStack = array();
113
114
    /**
115
     * Array of prepared regular expressions for protocols (schemas) matching
116
     *
117
     * @var array
118
     * @access private
119
     */
120
    private $_protoRegexps = array();
121
122
    /**
123
     * Array of prepared regular expressions for CSS matching
124
     *
125
     * @var array
126
     * @access private
127
     */
128
    private $_cssRegexps = array();
129
130
    /**
131
     * List of single tags ("<tag />")
132
     *
133
     * @var array
134
     * @access public
135
     */
136
    public $singleTags = array('area', 'br', 'img', 'input', 'hr', 'wbr', );
137
138
    /**
139
     * List of dangerous tags (such tags will be deleted)
140
     *
141
     * @var array
142
     * @access public
143
     */
144
    public $deleteTags = array(
145
        'applet', 'base',   'basefont', 'bgsound', 'blink',  'body',
146
        'embed',  'frame',  'frameset', 'head',    'html',   'ilayer',
147
        'iframe', 'layer',  'link',     'meta',    'object', 'style',
148
        'title',  'script',
149
        );
150
151
    /**
152
     * List of dangerous tags (such tags will be deleted, and all content
153
     * inside this tags will be also removed)
154
     *
155
     * @var array
156
     * @access public
157
     */
158
    public $deleteTagsContent = array('script', 'style', 'title', 'xml', );
159
160
    /**
161
     * Type of protocols filtering ('white' or 'black')
162
     *
163
     * @var string
164
     * @access public
165
     */
166
    public $protocolFiltering = 'white';
167
168
    /**
169
     * List of "dangerous" protocols (used for blacklist-filtering)
170
     *
171
     * @var array
172
     * @access public
173
     */
174
    public $blackProtocols = array(
175
        'about',   'chrome',     'data',       'disk',     'hcp',
176
        'help',    'javascript', 'livescript', 'lynxcgi',  'lynxexec',
177
        'ms-help', 'ms-its',     'mhtml',      'mocha',    'opera',
178
        'res',     'resource',   'shell',      'vbscript', 'view-source',
179
        'vnd.ms.radio',          'wysiwyg',
180
        );
181
182
    /**
183
     * List of "safe" protocols (used for whitelist-filtering)
184
     *
185
     * @var array
186
     * @access public
187
     */
188
    public $whiteProtocols = array(
189
        'ed2k',   'file', 'ftp',  'gopher', 'http',  'https',
190
        'irc',    'mailto', 'news', 'nntp', 'telnet', 'webcal',
191
        'xmpp',   'callto',
192
        );
193
194
    /**
195
     * List of attributes that can contain protocols
196
     *
197
     * @var array
198
     * @access public
199
     */
200
    public $protocolAttributes = array(
201
        'action', 'background', 'codebase', 'dynsrc', 'href', 'lowsrc', 'src',
202
        );
203
204
    /**
205
     * List of dangerous CSS keywords
206
     *
207
     * Whole style="" attribute will be removed, if parser will find one of
208
     * these keywords
209
     *
210
     * @var array
211
     * @access public
212
     */
213
    public $cssKeywords = array(
214
        'absolute', 'behavior',       'behaviour',   'content', 'expression',
215
        'fixed',    'include-source', 'moz-binding',
216
        );
217
218
    /**
219
     * List of tags that can have no "closing tag"
220
     *
221
     * @var array
222
     * @access public
223
     * @deprecated XHTML does not allow such tags
224
     */
225
    public $noClose = array();
226
227
    /**
228
     * List of block-level tags that terminates paragraph
229
     *
230
     * Paragraph will be closed when this tags opened
231
     *
232
     * @var array
233
     * @access public
234
     */
235
    public $closeParagraph = array(
236
        'address', 'blockquote', 'center', 'dd',      'dir',       'div',
237
        'dl',      'dt',         'h1',     'h2',      'h3',        'h4',
238
        'h5',      'h6',         'hr',     'isindex', 'listing',   'marquee',
239
        'menu',    'multicol',   'ol',     'p',       'plaintext', 'pre',
240
        'table',   'ul',         'xmp',
241
        );
242
243
    /**
244
     * List of table tags, all table tags outside a table will be removed
245
     *
246
     * @var array
247
     * @access public
248
     */
249
    public $tableTags = array(
250
        'caption', 'col', 'colgroup', 'tbody', 'td', 'tfoot', 'th',
251
        'thead',   'tr',
252
        );
253
254
    /**
255
     * List of list tags
256
     *
257
     * @var array
258
     * @access public
259
     */
260
    public $listTags = array('dir', 'menu', 'ol', 'ul', 'dl', );
261
262
    /**
263
     * List of dangerous attributes
264
     *
265
     * @var array
266
     * @access public
267
     */
268
    public $attributes = array('dynsrc');
269
    //public $attributes = array('dynsrc', 'id', 'name', ); //id and name are dangerous?
270
271
    /**
272
     * List of allowed "namespaced" attributes
273
     *
274
     * @var array
275
     * @access public
276
     */
277
    public $attributesNS = array('xml:lang', );
278
279
    /**
280
     * Constructs class
281
     *
282
     * @access public
283
     */
284
    public function __construct()
285
    {
286
        //making regular expressions based on Proto & CSS arrays
287
        foreach ($this->blackProtocols as $proto) {
288
            $preg = "/[\s\x01-\x1F]*";
289
            for ($i=0; $i<strlen($proto); $i++) {
290
                $preg .= $proto{$i} . "[\s\x01-\x1F]*";
291
            }
292
            $preg .= ":/i";
293
            $this->_protoRegexps[] = $preg;
294
        }
295
296
        foreach ($this->cssKeywords as $css) {
297
            $this->_cssRegexps[] = '/' . $css . '/i';
298
        }
299
        return true;
0 ignored issues
show
Bug introduced by
Constructors do not have meaningful return values, anything that is returned from here is discarded. Are you sure this is correct?
Loading history...
300
    }
301
302
    /**
303
     * Handles the writing of attributes - called from $this->_openHandler()
304
     *
305
     * @param array $attrs array of attributes $name => $value
306
     * @return boolean
307
     * @access private
308
     */
309
    private function _writeAttrs ($attrs)
310
    {
311
        if (is_array($attrs)) {
312
            foreach ($attrs as $name => $value) {
313
314
                $name = strtolower($name);
315
316
                if (strpos($name, 'on') === 0) {
317
                    continue;
318
                }
319
                if (strpos($name, 'data') === 0) {
320
                    continue;
321
                }
322
                if (in_array($name, $this->attributes)) {
323
                    continue;
324
                }
325
                if (!preg_match("/^[a-z0-9]+$/i", $name)) {
326
                    if (!in_array($name, $this->attributesNS))
327
                    {
328
                        continue;
329
                    }
330
                }
331
332
                if (($value === TRUE) || (is_null($value))) {
333
                    $value = $name;
334
                }
335
336
                if ($name == 'style') {
337
338
                   // removes insignificant backslahes
339
                   $value = str_replace("\\", '', $value);
340
341
                   // removes CSS comments
342
                   while (1)
343
                   {
344
                     $_value = preg_replace("!/\*.*?\*/!s", '', $value);
345
                     if ($_value == $value) break;
346
                     $value = $_value;
347
                   }
348
349
                   // replace all & to &amp;
350
                   $value = str_replace('&amp;', '&', $value);
351
                   $value = str_replace('&', '&amp;', $value);
352
353
                   foreach ($this->_cssRegexps as $css) {
354
                       if (preg_match($css, $value)) {
355
                           continue 2;
356
                       }
357
                   }
358
                   foreach ($this->_protoRegexps as $proto) {
359
                       if (preg_match($proto, $value)) {
360
                           continue 2;
361
                       }
362
                   }
363
                }
364
365
                $tempval = preg_replace_callback(
366
                        '/&#(\d+);?/m',
367
                        function ($matches) {
368
                            return chr($matches[0]);
369
                        },
370
                        $value
371
                    ); //"'
372
373
                $tempval = preg_replace_callback(
374
                        '/&#x([0-9a-f]+);?/mi',
375
                        function ($matches) {
376
                            return chr(hexdec($matches[0]));
377
                        },
378
                        $tempval
379
                    );
380
381
                if ((in_array($name, $this->protocolAttributes)) &&
382
                    (strpos($tempval, ':') !== false))
383
                {
384
                    if ($this->protocolFiltering == 'black') {
385
                        foreach ($this->_protoRegexps as $proto) {
386
                            if (preg_match($proto, $tempval)) continue 2;
387
                        }
388
                    } else {
389
                        $_tempval = explode(':', $tempval);
390
                        $proto = $_tempval[0];
391
                        if (!in_array($proto, $this->whiteProtocols)) {
392
                            continue;
393
                        }
394
                    }
395
                }
396
397
                $value = str_replace("\"", "&quot;", $value);
398
                $this->_xhtml .= ' ' . $name . '="' . $value . '"';
399
            }
400
        }
401
        return true;
402
    }
403
404
    /**
405
     * Opening tag handler - called from HTMLSax
406
     *
407
     * @param object $parser HTML Parser
408
     * @param string $name   tag name
409
     * @param array  $attrs  tag attributes
410
     * @return boolean
411
     * @access private
412
     */
413
    public function _openHandler(&$parser, $name, $attrs)
414
    {
415
        $name = strtolower($name);
416
417
        if (in_array($name, $this->deleteTagsContent)) {
418
            array_push($this->_dcStack, $name);
419
            $this->_dcCounter[$name] = isset($this->_dcCounter[$name]) ? $this->_dcCounter[$name]+1 : 1;
420
        }
421
        if (count($this->_dcStack) != 0) {
422
            return true;
423
        }
424
425
        if (in_array($name, $this->deleteTags)) {
426
            return true;
427
        }
428
429
        if (!preg_match("/^[a-z0-9]+$/i", $name)) {
430
            if (preg_match("!(?:\@|://)!i", $name)) {
431
                $this->_xhtml .= '&lt;' . $name . '&gt;';
432
            }
433
            return true;
434
        }
435
436
        if (in_array($name, $this->singleTags)) {
437
            $this->_xhtml .= '<' . $name;
438
            $this->_writeAttrs($attrs);
439
            $this->_xhtml .= ' />';
440
            return true;
441
        }
442
443
        // TABLES: cannot open table elements when we are not inside table
444
        if ((isset($this->_counter['table'])) && ($this->_counter['table'] <= 0)
445
            && (in_array($name, $this->tableTags)))
446
        {
447
            return true;
448
        }
449
450
        // PARAGRAPHS: close paragraph when closeParagraph tags opening
451
        if ((in_array($name, $this->closeParagraph)) && (in_array('p', $this->_stack))) {
452
            $this->_closeHandler($parser, 'p');
453
        }
454
455
        // LISTS: we should close <li> if <li> of the same level opening
456
        if ($name == 'li' && count($this->_liStack) &&
457
            $this->_listScope == $this->_liStack[count($this->_liStack)-1])
458
        {
459
            $this->_closeHandler($parser, 'li');
460
        }
461
462
        // LISTS: we want to know on what nesting level of lists we are
463
        if (in_array($name, $this->listTags)) {
464
            $this->_listScope++;
465
        }
466
        if ($name == 'li') {
467
            array_push($this->_liStack, $this->_listScope);
468
        }
469
470
        $this->_xhtml .= '<' . $name;
471
        $this->_writeAttrs($attrs);
472
        $this->_xhtml .= '>';
473
        array_push($this->_stack,$name);
474
        $this->_counter[$name] = isset($this->_counter[$name]) ? $this->_counter[$name]+1 : 1;
475
        return true;
476
    }
477
478
    /**
479
     * Closing tag handler - called from HTMLSax
480
     *
481
     * @param object $parsers HTML parser
0 ignored issues
show
Documentation introduced by
There is no parameter named $parsers. Did you maybe mean $parser?

This check looks for PHPDoc comments describing methods or function parameters that do not exist on the corresponding method or function. It has, however, found a similar but not annotated parameter which might be a good fit.

Consider the following example. The parameter $ireland is not defined by the method finale(...).

/**
 * @param array $germany
 * @param array $ireland
 */
function finale($germany, $island) {
    return "2:1";
}

The most likely cause is that the parameter was changed, but the annotation was not.

Loading history...
482
     * @param string $name    tag name
483
     * @return boolean
484
     * @access private
485
     */
486
    public function _closeHandler(&$parser, $name)
487
    {
488
489
        $name = strtolower($name);
490
491
        if (isset($this->_dcCounter[$name]) && ($this->_dcCounter[$name] > 0) &&
492
            (in_array($name, $this->deleteTagsContent)))
493
        {
494
           while ($name != ($tag = array_pop($this->_dcStack))) {
495
            $this->_dcCounter[$tag]--;
496
           }
497
498
           $this->_dcCounter[$name]--;
499
        }
500
501
        if (count($this->_dcStack) != 0) {
502
            return true;
503
        }
504
505
        if ((isset($this->_counter[$name])) && ($this->_counter[$name] > 0)) {
506
           while ($name != ($tag = array_pop($this->_stack))) {
507
               $this->_closeTag($tag);
508
           }
509
510
           $this->_closeTag($name);
511
        }
512
        return true;
513
    }
514
515
    /**
516
     * Closes tag
517
     *
518
     * @param string $tag tag name
519
     * @return boolean
520
     * @access private
521
     */
522
    public function _closeTag($tag)
523
    {
524
        if (!in_array($tag, $this->noClose)) {
0 ignored issues
show
Deprecated Code introduced by
The property Prado\Vendor\SafeHtml\TSafeHtmlParser::$noClose has been deprecated with message: XHTML does not allow such tags

This property has been deprecated. The supplier of the class has supplied an explanatory message.

The explanatory message should give you some clue as to whether and when the property will be removed from the class and what other property to use instead.

Loading history...
525
            $this->_xhtml .= '</' . $tag . '>';
526
        }
527
528
        $this->_counter[$tag]--;
529
530
        if (in_array($tag, $this->listTags)) {
531
            $this->_listScope--;
532
        }
533
534
        if ($tag == 'li') {
535
            array_pop($this->_liStack);
536
        }
537
        return true;
538
    }
539
540
    /**
541
     * Character data handler - called from HTMLSax
542
     *
543
     * @param object $parser HTML parser
544
     * @param string $data   textual data
545
     * @return boolean
546
     * @access private
547
     */
548
    public function _dataHandler(&$parser, $data)
549
    {
550
        if (count($this->_dcStack) == 0) {
551
            $this->_xhtml .= $data;
552
        }
553
        return true;
554
    }
555
556
    /**
557
     * Escape handler - called from HTMLSax
558
     *
559
     * @param object $parser HTML parser
560
     * @param string $data   comments or other type of data
561
     * @return boolean
562
     * @access private
563
     */
564
    public function _escapeHandler(&$parser, $data)
565
    {
566
        return true;
567
    }
568
569
    /**
570
     * Returns the XHTML document
571
     *
572
     * @return string Processed (X)HTML document
573
     * @access public
574
     */
575
    public function getXHTML ()
576
    {
577
        while ($tag = array_pop($this->_stack)) {
578
            $this->_closeTag($tag);
579
        }
580
581
        return $this->_xhtml;
582
    }
583
584
    /**
585
     * Clears current document data
586
     *
587
     * @return boolean
588
     * @access public
589
     */
590
    public function clear()
591
    {
592
        $this->_xhtml = '';
593
        return true;
594
    }
595
596
    /**
597
     * Main parsing fuction
598
     *
599
     * @param string $doc HTML document for processing
600
     * @return string Processed (X)HTML document
601
     * @access public
602
     */
603
    public function parse($doc, $isUTF7=false)
604
    {
605
	   $this->clear();
606
607
       // Save all '<' symbols
608
       $doc = preg_replace("/<(?=[^a-zA-Z\/\!\?\%])/", '&lt;', (string)$doc);
609
610
       // Web documents shouldn't contains \x00 symbol
611
       $doc = str_replace("\x00", '', $doc);
612
613
       // Opera6 bug workaround
614
       $doc = str_replace("\xC0\xBC", '&lt;', $doc);
615
616
       // UTF-7 encoding ASCII decode
617
       if($isUTF7)
618
            $doc = $this->repackUTF7($doc);
619
620
       // Instantiate the parser
621
       $parser= new TSax3();
622
623
       // Set up the parser
624
       $parser->set_object($this);
625
626
       $parser->set_element_handler('_openHandler','_closeHandler');
627
       $parser->set_data_handler('_dataHandler');
628
       $parser->set_escape_handler('_escapeHandler');
629
630
       $parser->parse($doc);
631
632
       return $this->getXHTML();
633
634
    }
635
636
637
    /**
638
     * UTF-7 decoding fuction
639
     *
640
     * @param string $str HTML document for recode ASCII part of UTF-7 back to ASCII
641
     * @return string Decoded document
642
     * @access private
643
     */
644
    private function repackUTF7($str)
645
    {
646
       return preg_replace_callback('!\+([0-9a-zA-Z/]+)\-!', array($this, 'repackUTF7Callback'), $str);
647
    }
648
649
    /**
650
     * Additional UTF-7 decoding fuction
651
     *
652
     * @param string $str String for recode ASCII part of UTF-7 back to ASCII
653
     * @return string Recoded string
654
     * @access private
655
     */
656
    private function repackUTF7Callback($str)
657
    {
658
       $str = base64_decode($str[1]);
659
       $str = preg_replace_callback('/^((?:\x00.)*)((?:[^\x00].)+)/', array($this, 'repackUTF7Back'), $str);
660
       return preg_replace('/\x00(.)/', '$1', $str);
661
    }
662
663
    /**
664
     * Additional UTF-7 encoding fuction
665
     *
666
     * @param string $str String for recode ASCII part of UTF-7 back to ASCII
667
     * @return string Recoded string
668
     * @access private
669
     */
670
    private function repackUTF7Back($str)
671
    {
672
       return $str[1].'+'.rtrim(base64_encode($str[2]), '=').'-';
673
    }
674
}
675
676
/*
677
 * Local variables:
678
 * tab-width: 4
679
 * c-basic-offset: 4
680
 * c-hanging-comment-ender-p: nil
681
 * End:
682
 */
683
684