GitHub Access Token became invalid

It seems like the GitHub access token used for retrieving details about this repository from GitHub became invalid. This might prevent certain types of inspections from being run (in particular, everything related to pull requests).
Please ask an admin of your repository to re-new the access token on this website.
Completed
Push — master ( b119a5...2262ef )
by gyeong-won
19:32 queued 13:26
created
classes/security/htmlpurifier/library/HTMLPurifier/AttrDef/URI/IPv6.php 4 patches
Doc Comments   +3 added lines patch added patch discarded remove patch
@@ -9,6 +9,9 @@
 block discarded – undo
9 9
 class HTMLPurifier_AttrDef_URI_IPv6 extends HTMLPurifier_AttrDef_URI_IPv4
10 10
 {
11 11
 
12
+    /**
13
+     * @param string $aIP
14
+     */
12 15
     public function validate($aIP, $config, $context) {
13 16
 
14 17
         if (!$this->ip4) $this->_loadRegex();
Please login to merge, or discard this patch.
Indentation   +84 added lines, -84 removed lines patch added patch discarded remove patch
@@ -9,90 +9,90 @@
 block discarded – undo
9 9
 class HTMLPurifier_AttrDef_URI_IPv6 extends HTMLPurifier_AttrDef_URI_IPv4
10 10
 {
11 11
 
12
-    public function validate($aIP, $config, $context) {
13
-
14
-        if (!$this->ip4) $this->_loadRegex();
15
-
16
-        $original = $aIP;
17
-
18
-        $hex = '[0-9a-fA-F]';
19
-        $blk = '(?:' . $hex . '{1,4})';
20
-        $pre = '(?:/(?:12[0-8]|1[0-1][0-9]|[1-9][0-9]|[0-9]))';   // /0 - /128
21
-
22
-        //      prefix check
23
-        if (strpos($aIP, '/') !== false)
24
-        {
25
-                if (preg_match('#' . $pre . '$#s', $aIP, $find))
26
-                {
27
-                        $aIP = substr($aIP, 0, 0-strlen($find[0]));
28
-                        unset($find);
29
-                }
30
-                else
31
-                {
32
-                        return false;
33
-                }
34
-        }
35
-
36
-        //      IPv4-compatiblity check
37
-        if (preg_match('#(?<=:'.')' . $this->ip4 . '$#s', $aIP, $find))
38
-        {
39
-                $aIP = substr($aIP, 0, 0-strlen($find[0]));
40
-                $ip = explode('.', $find[0]);
41
-                $ip = array_map('dechex', $ip);
42
-                $aIP .= $ip[0] . $ip[1] . ':' . $ip[2] . $ip[3];
43
-                unset($find, $ip);
44
-        }
45
-
46
-        //      compression check
47
-        $aIP = explode('::', $aIP);
48
-        $c = count($aIP);
49
-        if ($c > 2)
50
-        {
51
-                return false;
52
-        }
53
-        elseif ($c == 2)
54
-        {
55
-                list($first, $second) = $aIP;
56
-                $first = explode(':', $first);
57
-                $second = explode(':', $second);
58
-
59
-                if (count($first) + count($second) > 8)
60
-                {
61
-                        return false;
62
-                }
63
-
64
-                while(count($first) < 8)
65
-                {
66
-                        array_push($first, '0');
67
-                }
68
-
69
-                array_splice($first, 8 - count($second), 8, $second);
70
-                $aIP = $first;
71
-                unset($first,$second);
72
-        }
73
-        else
74
-        {
75
-                $aIP = explode(':', $aIP[0]);
76
-        }
77
-        $c = count($aIP);
78
-
79
-        if ($c != 8)
80
-        {
81
-                return false;
82
-        }
83
-
84
-        //      All the pieces should be 16-bit hex strings. Are they?
85
-        foreach ($aIP as $piece)
86
-        {
87
-                if (!preg_match('#^[0-9a-fA-F]{4}$#s', sprintf('%04s', $piece)))
88
-                {
89
-                        return false;
90
-                }
91
-        }
92
-
93
-        return $original;
94
-
95
-    }
12
+	public function validate($aIP, $config, $context) {
13
+
14
+		if (!$this->ip4) $this->_loadRegex();
15
+
16
+		$original = $aIP;
17
+
18
+		$hex = '[0-9a-fA-F]';
19
+		$blk = '(?:' . $hex . '{1,4})';
20
+		$pre = '(?:/(?:12[0-8]|1[0-1][0-9]|[1-9][0-9]|[0-9]))';   // /0 - /128
21
+
22
+		//      prefix check
23
+		if (strpos($aIP, '/') !== false)
24
+		{
25
+				if (preg_match('#' . $pre . '$#s', $aIP, $find))
26
+				{
27
+						$aIP = substr($aIP, 0, 0-strlen($find[0]));
28
+						unset($find);
29
+				}
30
+				else
31
+				{
32
+						return false;
33
+				}
34
+		}
35
+
36
+		//      IPv4-compatiblity check
37
+		if (preg_match('#(?<=:'.')' . $this->ip4 . '$#s', $aIP, $find))
38
+		{
39
+				$aIP = substr($aIP, 0, 0-strlen($find[0]));
40
+				$ip = explode('.', $find[0]);
41
+				$ip = array_map('dechex', $ip);
42
+				$aIP .= $ip[0] . $ip[1] . ':' . $ip[2] . $ip[3];
43
+				unset($find, $ip);
44
+		}
45
+
46
+		//      compression check
47
+		$aIP = explode('::', $aIP);
48
+		$c = count($aIP);
49
+		if ($c > 2)
50
+		{
51
+				return false;
52
+		}
53
+		elseif ($c == 2)
54
+		{
55
+				list($first, $second) = $aIP;
56
+				$first = explode(':', $first);
57
+				$second = explode(':', $second);
58
+
59
+				if (count($first) + count($second) > 8)
60
+				{
61
+						return false;
62
+				}
63
+
64
+				while(count($first) < 8)
65
+				{
66
+						array_push($first, '0');
67
+				}
68
+
69
+				array_splice($first, 8 - count($second), 8, $second);
70
+				$aIP = $first;
71
+				unset($first,$second);
72
+		}
73
+		else
74
+		{
75
+				$aIP = explode(':', $aIP[0]);
76
+		}
77
+		$c = count($aIP);
78
+
79
+		if ($c != 8)
80
+		{
81
+				return false;
82
+		}
83
+
84
+		//      All the pieces should be 16-bit hex strings. Are they?
85
+		foreach ($aIP as $piece)
86
+		{
87
+				if (!preg_match('#^[0-9a-fA-F]{4}$#s', sprintf('%04s', $piece)))
88
+				{
89
+						return false;
90
+				}
91
+		}
92
+
93
+		return $original;
94
+
95
+	}
96 96
 
97 97
 }
98 98
 
Please login to merge, or discard this patch.
Braces   +6 added lines, -7 removed lines patch added patch discarded remove patch
@@ -11,7 +11,9 @@  discard block
 block discarded – undo
11 11
 
12 12
     public function validate($aIP, $config, $context) {
13 13
 
14
-        if (!$this->ip4) $this->_loadRegex();
14
+        if (!$this->ip4) {
15
+        	$this->_loadRegex();
16
+        }
15 17
 
16 18
         $original = $aIP;
17 19
 
@@ -26,8 +28,7 @@  discard block
 block discarded – undo
26 28
                 {
27 29
                         $aIP = substr($aIP, 0, 0-strlen($find[0]));
28 30
                         unset($find);
29
-                }
30
-                else
31
+                } else
31 32
                 {
32 33
                         return false;
33 34
                 }
@@ -49,8 +50,7 @@  discard block
 block discarded – undo
49 50
         if ($c > 2)
50 51
         {
51 52
                 return false;
52
-        }
53
-        elseif ($c == 2)
53
+        } elseif ($c == 2)
54 54
         {
55 55
                 list($first, $second) = $aIP;
56 56
                 $first = explode(':', $first);
@@ -69,8 +69,7 @@  discard block
 block discarded – undo
69 69
                 array_splice($first, 8 - count($second), 8, $second);
70 70
                 $aIP = $first;
71 71
                 unset($first,$second);
72
-        }
73
-        else
72
+        } else
74 73
         {
75 74
                 $aIP = explode(':', $aIP[0]);
76 75
         }
Please login to merge, or discard this patch.
Spacing   +9 added lines, -9 removed lines patch added patch discarded remove patch
@@ -16,15 +16,15 @@  discard block
 block discarded – undo
16 16
         $original = $aIP;
17 17
 
18 18
         $hex = '[0-9a-fA-F]';
19
-        $blk = '(?:' . $hex . '{1,4})';
20
-        $pre = '(?:/(?:12[0-8]|1[0-1][0-9]|[1-9][0-9]|[0-9]))';   // /0 - /128
19
+        $blk = '(?:'.$hex.'{1,4})';
20
+        $pre = '(?:/(?:12[0-8]|1[0-1][0-9]|[1-9][0-9]|[0-9]))'; // /0 - /128
21 21
 
22 22
         //      prefix check
23 23
         if (strpos($aIP, '/') !== false)
24 24
         {
25
-                if (preg_match('#' . $pre . '$#s', $aIP, $find))
25
+                if (preg_match('#'.$pre.'$#s', $aIP, $find))
26 26
                 {
27
-                        $aIP = substr($aIP, 0, 0-strlen($find[0]));
27
+                        $aIP = substr($aIP, 0, 0 - strlen($find[0]));
28 28
                         unset($find);
29 29
                 }
30 30
                 else
@@ -34,12 +34,12 @@  discard block
 block discarded – undo
34 34
         }
35 35
 
36 36
         //      IPv4-compatiblity check
37
-        if (preg_match('#(?<=:'.')' . $this->ip4 . '$#s', $aIP, $find))
37
+        if (preg_match('#(?<=:'.')'.$this->ip4.'$#s', $aIP, $find))
38 38
         {
39
-                $aIP = substr($aIP, 0, 0-strlen($find[0]));
39
+                $aIP = substr($aIP, 0, 0 - strlen($find[0]));
40 40
                 $ip = explode('.', $find[0]);
41 41
                 $ip = array_map('dechex', $ip);
42
-                $aIP .= $ip[0] . $ip[1] . ':' . $ip[2] . $ip[3];
42
+                $aIP .= $ip[0].$ip[1].':'.$ip[2].$ip[3];
43 43
                 unset($find, $ip);
44 44
         }
45 45
 
@@ -61,14 +61,14 @@  discard block
 block discarded – undo
61 61
                         return false;
62 62
                 }
63 63
 
64
-                while(count($first) < 8)
64
+                while (count($first) < 8)
65 65
                 {
66 66
                         array_push($first, '0');
67 67
                 }
68 68
 
69 69
                 array_splice($first, 8 - count($second), 8, $second);
70 70
                 $aIP = $first;
71
-                unset($first,$second);
71
+                unset($first, $second);
72 72
         }
73 73
         else
74 74
         {
Please login to merge, or discard this patch.
classes/security/htmlpurifier/library/HTMLPurifier/DoctypeRegistry.php 4 patches
Doc Comments   +3 added lines, -1 removed lines patch added patch discarded remove patch
@@ -17,10 +17,12 @@
 block discarded – undo
17 17
      * Registers a doctype to the registry
18 18
      * @note Accepts a fully-formed doctype object, or the
19 19
      *       parameters for constructing a doctype object
20
-     * @param $doctype Name of doctype or literal doctype object
20
+     * @param string $doctype Name of doctype or literal doctype object
21 21
      * @param $modules Modules doctype will load
22 22
      * @param $modules_for_modes Modules doctype will load for certain modes
23 23
      * @param $aliases Alias names for doctype
24
+     * @param string $dtd_public
25
+     * @param string $dtd_system
24 26
      * @return Editable registered doctype
25 27
      */
26 28
     public function register($doctype, $xml = true, $modules = array(),
Please login to merge, or discard this patch.
Indentation   +89 added lines, -89 removed lines patch added patch discarded remove patch
@@ -3,100 +3,100 @@
 block discarded – undo
3 3
 class HTMLPurifier_DoctypeRegistry
4 4
 {
5 5
 
6
-    /**
7
-     * Hash of doctype names to doctype objects
8
-     */
9
-    protected $doctypes;
6
+	/**
7
+	 * Hash of doctype names to doctype objects
8
+	 */
9
+	protected $doctypes;
10 10
 
11
-    /**
12
-     * Lookup table of aliases to real doctype names
13
-     */
14
-    protected $aliases;
11
+	/**
12
+	 * Lookup table of aliases to real doctype names
13
+	 */
14
+	protected $aliases;
15 15
 
16
-    /**
17
-     * Registers a doctype to the registry
18
-     * @note Accepts a fully-formed doctype object, or the
19
-     *       parameters for constructing a doctype object
20
-     * @param $doctype Name of doctype or literal doctype object
21
-     * @param $modules Modules doctype will load
22
-     * @param $modules_for_modes Modules doctype will load for certain modes
23
-     * @param $aliases Alias names for doctype
24
-     * @return Editable registered doctype
25
-     */
26
-    public function register($doctype, $xml = true, $modules = array(),
27
-        $tidy_modules = array(), $aliases = array(), $dtd_public = null, $dtd_system = null
28
-    ) {
29
-        if (!is_array($modules)) $modules = array($modules);
30
-        if (!is_array($tidy_modules)) $tidy_modules = array($tidy_modules);
31
-        if (!is_array($aliases)) $aliases = array($aliases);
32
-        if (!is_object($doctype)) {
33
-            $doctype = new HTMLPurifier_Doctype(
34
-                $doctype, $xml, $modules, $tidy_modules, $aliases, $dtd_public, $dtd_system
35
-            );
36
-        }
37
-        $this->doctypes[$doctype->name] = $doctype;
38
-        $name = $doctype->name;
39
-        // hookup aliases
40
-        foreach ($doctype->aliases as $alias) {
41
-            if (isset($this->doctypes[$alias])) continue;
42
-            $this->aliases[$alias] = $name;
43
-        }
44
-        // remove old aliases
45
-        if (isset($this->aliases[$name])) unset($this->aliases[$name]);
46
-        return $doctype;
47
-    }
16
+	/**
17
+	 * Registers a doctype to the registry
18
+	 * @note Accepts a fully-formed doctype object, or the
19
+	 *       parameters for constructing a doctype object
20
+	 * @param $doctype Name of doctype or literal doctype object
21
+	 * @param $modules Modules doctype will load
22
+	 * @param $modules_for_modes Modules doctype will load for certain modes
23
+	 * @param $aliases Alias names for doctype
24
+	 * @return Editable registered doctype
25
+	 */
26
+	public function register($doctype, $xml = true, $modules = array(),
27
+		$tidy_modules = array(), $aliases = array(), $dtd_public = null, $dtd_system = null
28
+	) {
29
+		if (!is_array($modules)) $modules = array($modules);
30
+		if (!is_array($tidy_modules)) $tidy_modules = array($tidy_modules);
31
+		if (!is_array($aliases)) $aliases = array($aliases);
32
+		if (!is_object($doctype)) {
33
+			$doctype = new HTMLPurifier_Doctype(
34
+				$doctype, $xml, $modules, $tidy_modules, $aliases, $dtd_public, $dtd_system
35
+			);
36
+		}
37
+		$this->doctypes[$doctype->name] = $doctype;
38
+		$name = $doctype->name;
39
+		// hookup aliases
40
+		foreach ($doctype->aliases as $alias) {
41
+			if (isset($this->doctypes[$alias])) continue;
42
+			$this->aliases[$alias] = $name;
43
+		}
44
+		// remove old aliases
45
+		if (isset($this->aliases[$name])) unset($this->aliases[$name]);
46
+		return $doctype;
47
+	}
48 48
 
49
-    /**
50
-     * Retrieves reference to a doctype of a certain name
51
-     * @note This function resolves aliases
52
-     * @note When possible, use the more fully-featured make()
53
-     * @param $doctype Name of doctype
54
-     * @return Editable doctype object
55
-     */
56
-    public function get($doctype) {
57
-        if (isset($this->aliases[$doctype])) $doctype = $this->aliases[$doctype];
58
-        if (!isset($this->doctypes[$doctype])) {
59
-            trigger_error('Doctype ' . htmlspecialchars($doctype, ENT_COMPAT | ENT_HTML401, 'UTF-8', false) . ' does not exist', E_USER_ERROR);
60
-            $anon = new HTMLPurifier_Doctype($doctype);
61
-            return $anon;
62
-        }
63
-        return $this->doctypes[$doctype];
64
-    }
49
+	/**
50
+	 * Retrieves reference to a doctype of a certain name
51
+	 * @note This function resolves aliases
52
+	 * @note When possible, use the more fully-featured make()
53
+	 * @param $doctype Name of doctype
54
+	 * @return Editable doctype object
55
+	 */
56
+	public function get($doctype) {
57
+		if (isset($this->aliases[$doctype])) $doctype = $this->aliases[$doctype];
58
+		if (!isset($this->doctypes[$doctype])) {
59
+			trigger_error('Doctype ' . htmlspecialchars($doctype, ENT_COMPAT | ENT_HTML401, 'UTF-8', false) . ' does not exist', E_USER_ERROR);
60
+			$anon = new HTMLPurifier_Doctype($doctype);
61
+			return $anon;
62
+		}
63
+		return $this->doctypes[$doctype];
64
+	}
65 65
 
66
-    /**
67
-     * Creates a doctype based on a configuration object,
68
-     * will perform initialization on the doctype
69
-     * @note Use this function to get a copy of doctype that config
70
-     *       can hold on to (this is necessary in order to tell
71
-     *       Generator whether or not the current document is XML
72
-     *       based or not).
73
-     */
74
-    public function make($config) {
75
-        return clone $this->get($this->getDoctypeFromConfig($config));
76
-    }
66
+	/**
67
+	 * Creates a doctype based on a configuration object,
68
+	 * will perform initialization on the doctype
69
+	 * @note Use this function to get a copy of doctype that config
70
+	 *       can hold on to (this is necessary in order to tell
71
+	 *       Generator whether or not the current document is XML
72
+	 *       based or not).
73
+	 */
74
+	public function make($config) {
75
+		return clone $this->get($this->getDoctypeFromConfig($config));
76
+	}
77 77
 
78
-    /**
79
-     * Retrieves the doctype from the configuration object
80
-     */
81
-    public function getDoctypeFromConfig($config) {
82
-        // recommended test
83
-        $doctype = $config->get('HTML.Doctype');
84
-        if (!empty($doctype)) return $doctype;
85
-        $doctype = $config->get('HTML.CustomDoctype');
86
-        if (!empty($doctype)) return $doctype;
87
-        // backwards-compatibility
88
-        if ($config->get('HTML.XHTML')) {
89
-            $doctype = 'XHTML 1.0';
90
-        } else {
91
-            $doctype = 'HTML 4.01';
92
-        }
93
-        if ($config->get('HTML.Strict')) {
94
-            $doctype .= ' Strict';
95
-        } else {
96
-            $doctype .= ' Transitional';
97
-        }
98
-        return $doctype;
99
-    }
78
+	/**
79
+	 * Retrieves the doctype from the configuration object
80
+	 */
81
+	public function getDoctypeFromConfig($config) {
82
+		// recommended test
83
+		$doctype = $config->get('HTML.Doctype');
84
+		if (!empty($doctype)) return $doctype;
85
+		$doctype = $config->get('HTML.CustomDoctype');
86
+		if (!empty($doctype)) return $doctype;
87
+		// backwards-compatibility
88
+		if ($config->get('HTML.XHTML')) {
89
+			$doctype = 'XHTML 1.0';
90
+		} else {
91
+			$doctype = 'HTML 4.01';
92
+		}
93
+		if ($config->get('HTML.Strict')) {
94
+			$doctype .= ' Strict';
95
+		} else {
96
+			$doctype .= ' Transitional';
97
+		}
98
+		return $doctype;
99
+	}
100 100
 
101 101
 }
102 102
 
Please login to merge, or discard this patch.
Braces   +24 added lines, -8 removed lines patch added patch discarded remove patch
@@ -26,9 +26,15 @@  discard block
 block discarded – undo
26 26
     public function register($doctype, $xml = true, $modules = array(),
27 27
         $tidy_modules = array(), $aliases = array(), $dtd_public = null, $dtd_system = null
28 28
     ) {
29
-        if (!is_array($modules)) $modules = array($modules);
30
-        if (!is_array($tidy_modules)) $tidy_modules = array($tidy_modules);
31
-        if (!is_array($aliases)) $aliases = array($aliases);
29
+        if (!is_array($modules)) {
30
+        	$modules = array($modules);
31
+        }
32
+        if (!is_array($tidy_modules)) {
33
+        	$tidy_modules = array($tidy_modules);
34
+        }
35
+        if (!is_array($aliases)) {
36
+        	$aliases = array($aliases);
37
+        }
32 38
         if (!is_object($doctype)) {
33 39
             $doctype = new HTMLPurifier_Doctype(
34 40
                 $doctype, $xml, $modules, $tidy_modules, $aliases, $dtd_public, $dtd_system
@@ -38,11 +44,15 @@  discard block
 block discarded – undo
38 44
         $name = $doctype->name;
39 45
         // hookup aliases
40 46
         foreach ($doctype->aliases as $alias) {
41
-            if (isset($this->doctypes[$alias])) continue;
47
+            if (isset($this->doctypes[$alias])) {
48
+            	continue;
49
+            }
42 50
             $this->aliases[$alias] = $name;
43 51
         }
44 52
         // remove old aliases
45
-        if (isset($this->aliases[$name])) unset($this->aliases[$name]);
53
+        if (isset($this->aliases[$name])) {
54
+        	unset($this->aliases[$name]);
55
+        }
46 56
         return $doctype;
47 57
     }
48 58
 
@@ -54,7 +64,9 @@  discard block
 block discarded – undo
54 64
      * @return Editable doctype object
55 65
      */
56 66
     public function get($doctype) {
57
-        if (isset($this->aliases[$doctype])) $doctype = $this->aliases[$doctype];
67
+        if (isset($this->aliases[$doctype])) {
68
+        	$doctype = $this->aliases[$doctype];
69
+        }
58 70
         if (!isset($this->doctypes[$doctype])) {
59 71
             trigger_error('Doctype ' . htmlspecialchars($doctype, ENT_COMPAT | ENT_HTML401, 'UTF-8', false) . ' does not exist', E_USER_ERROR);
60 72
             $anon = new HTMLPurifier_Doctype($doctype);
@@ -81,9 +93,13 @@  discard block
 block discarded – undo
81 93
     public function getDoctypeFromConfig($config) {
82 94
         // recommended test
83 95
         $doctype = $config->get('HTML.Doctype');
84
-        if (!empty($doctype)) return $doctype;
96
+        if (!empty($doctype)) {
97
+        	return $doctype;
98
+        }
85 99
         $doctype = $config->get('HTML.CustomDoctype');
86
-        if (!empty($doctype)) return $doctype;
100
+        if (!empty($doctype)) {
101
+        	return $doctype;
102
+        }
87 103
         // backwards-compatibility
88 104
         if ($config->get('HTML.XHTML')) {
89 105
             $doctype = 'XHTML 1.0';
Please login to merge, or discard this patch.
Spacing   +1 added lines, -1 removed lines patch added patch discarded remove patch
@@ -56,7 +56,7 @@
 block discarded – undo
56 56
     public function get($doctype) {
57 57
         if (isset($this->aliases[$doctype])) $doctype = $this->aliases[$doctype];
58 58
         if (!isset($this->doctypes[$doctype])) {
59
-            trigger_error('Doctype ' . htmlspecialchars($doctype, ENT_COMPAT | ENT_HTML401, 'UTF-8', false) . ' does not exist', E_USER_ERROR);
59
+            trigger_error('Doctype '.htmlspecialchars($doctype, ENT_COMPAT | ENT_HTML401, 'UTF-8', false).' does not exist', E_USER_ERROR);
60 60
             $anon = new HTMLPurifier_Doctype($doctype);
61 61
             return $anon;
62 62
         }
Please login to merge, or discard this patch.
classes/security/htmlpurifier/library/HTMLPurifier/Encoder.php 4 patches
Doc Comments   +5 added lines patch added patch discarded remove patch
@@ -31,6 +31,9 @@  discard block
 block discarded – undo
31 31
 
32 32
     /**
33 33
      * iconv wrapper which mutes errors and works around bugs.
34
+     * @param string $in
35
+     * @param string $out
36
+     * @param string $text
34 37
      */
35 38
     public static function iconv($in, $out, $text, $max_chunk_size = 8000) {
36 39
         $code = self::testIconvTruncateBug();
@@ -332,6 +335,7 @@  discard block
 block discarded – undo
332 335
 
333 336
     /**
334 337
      * Converts a string to UTF-8 based on configuration.
338
+     * @param HTMLPurifier_Context $context
335 339
      */
336 340
     public static function convertToUTF8($str, $config, $context) {
337 341
         $encoding = $config->get('Core.Encoding');
@@ -362,6 +366,7 @@  discard block
 block discarded – undo
362 366
      * Converts a string from UTF-8 based on configuration.
363 367
      * @note Currently, this is a lossy conversion, with unexpressable
364 368
      *       characters being omitted.
369
+     * @param HTMLPurifier_Context $context
365 370
      */
366 371
     public static function convertFromUTF8($str, $config, $context) {
367 372
         $encoding = $config->get('Core.Encoding');
Please login to merge, or discard this patch.
Indentation   +526 added lines, -526 removed lines patch added patch discarded remove patch
@@ -7,532 +7,532 @@
 block discarded – undo
7 7
 class HTMLPurifier_Encoder
8 8
 {
9 9
 
10
-    /**
11
-     * Constructor throws fatal error if you attempt to instantiate class
12
-     */
13
-    private function __construct() {
14
-        trigger_error('Cannot instantiate encoder, call methods statically', E_USER_ERROR);
15
-    }
16
-
17
-    /**
18
-     * Error-handler that mutes errors, alternative to shut-up operator.
19
-     */
20
-    public static function muteErrorHandler() {}
21
-
22
-    /**
23
-     * iconv wrapper which mutes errors, but doesn't work around bugs.
24
-     */
25
-    public static function unsafeIconv($in, $out, $text) {
26
-        set_error_handler(array('HTMLPurifier_Encoder', 'muteErrorHandler'));
27
-        $r = iconv($in, $out, $text);
28
-        restore_error_handler();
29
-        return $r;
30
-    }
31
-
32
-    /**
33
-     * iconv wrapper which mutes errors and works around bugs.
34
-     */
35
-    public static function iconv($in, $out, $text, $max_chunk_size = 8000) {
36
-        $code = self::testIconvTruncateBug();
37
-        if ($code == self::ICONV_OK) {
38
-            return self::unsafeIconv($in, $out, $text);
39
-        } elseif ($code == self::ICONV_TRUNCATES) {
40
-            // we can only work around this if the input character set
41
-            // is utf-8
42
-            if ($in == 'utf-8') {
43
-                if ($max_chunk_size < 4) {
44
-                    trigger_error('max_chunk_size is too small', E_USER_WARNING);
45
-                    return false;
46
-                }
47
-                // split into 8000 byte chunks, but be careful to handle
48
-                // multibyte boundaries properly
49
-                if (($c = strlen($text)) <= $max_chunk_size) {
50
-                    return self::unsafeIconv($in, $out, $text);
51
-                }
52
-                $r = '';
53
-                $i = 0;
54
-                while (true) {
55
-                    if ($i + $max_chunk_size >= $c) {
56
-                        $r .= self::unsafeIconv($in, $out, substr($text, $i));
57
-                        break;
58
-                    }
59
-                    // wibble the boundary
60
-                    if (0x80 != (0xC0 & ord($text[$i + $max_chunk_size]))) {
61
-                        $chunk_size = $max_chunk_size;
62
-                    } elseif (0x80 != (0xC0 & ord($text[$i + $max_chunk_size - 1]))) {
63
-                        $chunk_size = $max_chunk_size - 1;
64
-                    } elseif (0x80 != (0xC0 & ord($text[$i + $max_chunk_size - 2]))) {
65
-                        $chunk_size = $max_chunk_size - 2;
66
-                    } elseif (0x80 != (0xC0 & ord($text[$i + $max_chunk_size - 3]))) {
67
-                        $chunk_size = $max_chunk_size - 3;
68
-                    } else {
69
-                        return false; // rather confusing UTF-8...
70
-                    }
71
-                    $chunk = substr($text, $i, $chunk_size); // substr doesn't mind overlong lengths
72
-                    $r .= self::unsafeIconv($in, $out, $chunk);
73
-                    $i += $chunk_size;
74
-                }
75
-                return $r;
76
-            } else {
77
-                return false;
78
-            }
79
-        } else {
80
-            return false;
81
-        }
82
-    }
83
-
84
-    /**
85
-     * Cleans a UTF-8 string for well-formedness and SGML validity
86
-     *
87
-     * It will parse according to UTF-8 and return a valid UTF8 string, with
88
-     * non-SGML codepoints excluded.
89
-     *
90
-     * @note Just for reference, the non-SGML code points are 0 to 31 and
91
-     *       127 to 159, inclusive.  However, we allow code points 9, 10
92
-     *       and 13, which are the tab, line feed and carriage return
93
-     *       respectively. 128 and above the code points map to multibyte
94
-     *       UTF-8 representations.
95
-     *
96
-     * @note Fallback code adapted from utf8ToUnicode by Henri Sivonen and
97
-     *       [email protected] at <http://iki.fi/hsivonen/php-utf8/> under the
98
-     *       LGPL license.  Notes on what changed are inside, but in general,
99
-     *       the original code transformed UTF-8 text into an array of integer
100
-     *       Unicode codepoints. Understandably, transforming that back to
101
-     *       a string would be somewhat expensive, so the function was modded to
102
-     *       directly operate on the string.  However, this discourages code
103
-     *       reuse, and the logic enumerated here would be useful for any
104
-     *       function that needs to be able to understand UTF-8 characters.
105
-     *       As of right now, only smart lossless character encoding converters
106
-     *       would need that, and I'm probably not going to implement them.
107
-     *       Once again, PHP 6 should solve all our problems.
108
-     */
109
-    public static function cleanUTF8($str, $force_php = false) {
110
-
111
-        // UTF-8 validity is checked since PHP 4.3.5
112
-        // This is an optimization: if the string is already valid UTF-8, no
113
-        // need to do PHP stuff. 99% of the time, this will be the case.
114
-        // The regexp matches the XML char production, as well as well as excluding
115
-        // non-SGML codepoints U+007F to U+009F
116
-        if (preg_match('/^[\x{9}\x{A}\x{D}\x{20}-\x{7E}\x{A0}-\x{D7FF}\x{E000}-\x{FFFD}\x{10000}-\x{10FFFF}]*$/Du', $str)) {
117
-            return $str;
118
-        }
119
-
120
-        $mState = 0; // cached expected number of octets after the current octet
121
-                     // until the beginning of the next UTF8 character sequence
122
-        $mUcs4  = 0; // cached Unicode character
123
-        $mBytes = 1; // cached expected number of octets in the current sequence
124
-
125
-        // original code involved an $out that was an array of Unicode
126
-        // codepoints.  Instead of having to convert back into UTF-8, we've
127
-        // decided to directly append valid UTF-8 characters onto a string
128
-        // $out once they're done.  $char accumulates raw bytes, while $mUcs4
129
-        // turns into the Unicode code point, so there's some redundancy.
130
-
131
-        $out = '';
132
-        $char = '';
133
-
134
-        $len = strlen($str);
135
-        for($i = 0; $i < $len; $i++) {
136
-            $in = ord($str{$i});
137
-            $char .= $str[$i]; // append byte to char
138
-            if (0 == $mState) {
139
-                // When mState is zero we expect either a US-ASCII character
140
-                // or a multi-octet sequence.
141
-                if (0 == (0x80 & ($in))) {
142
-                    // US-ASCII, pass straight through.
143
-                    if (($in <= 31 || $in == 127) &&
144
-                        !($in == 9 || $in == 13 || $in == 10) // save \r\t\n
145
-                    ) {
146
-                        // control characters, remove
147
-                    } else {
148
-                        $out .= $char;
149
-                    }
150
-                    // reset
151
-                    $char = '';
152
-                    $mBytes = 1;
153
-                } elseif (0xC0 == (0xE0 & ($in))) {
154
-                    // First octet of 2 octet sequence
155
-                    $mUcs4 = ($in);
156
-                    $mUcs4 = ($mUcs4 & 0x1F) << 6;
157
-                    $mState = 1;
158
-                    $mBytes = 2;
159
-                } elseif (0xE0 == (0xF0 & ($in))) {
160
-                    // First octet of 3 octet sequence
161
-                    $mUcs4 = ($in);
162
-                    $mUcs4 = ($mUcs4 & 0x0F) << 12;
163
-                    $mState = 2;
164
-                    $mBytes = 3;
165
-                } elseif (0xF0 == (0xF8 & ($in))) {
166
-                    // First octet of 4 octet sequence
167
-                    $mUcs4 = ($in);
168
-                    $mUcs4 = ($mUcs4 & 0x07) << 18;
169
-                    $mState = 3;
170
-                    $mBytes = 4;
171
-                } elseif (0xF8 == (0xFC & ($in))) {
172
-                    // First octet of 5 octet sequence.
173
-                    //
174
-                    // This is illegal because the encoded codepoint must be
175
-                    // either:
176
-                    // (a) not the shortest form or
177
-                    // (b) outside the Unicode range of 0-0x10FFFF.
178
-                    // Rather than trying to resynchronize, we will carry on
179
-                    // until the end of the sequence and let the later error
180
-                    // handling code catch it.
181
-                    $mUcs4 = ($in);
182
-                    $mUcs4 = ($mUcs4 & 0x03) << 24;
183
-                    $mState = 4;
184
-                    $mBytes = 5;
185
-                } elseif (0xFC == (0xFE & ($in))) {
186
-                    // First octet of 6 octet sequence, see comments for 5
187
-                    // octet sequence.
188
-                    $mUcs4 = ($in);
189
-                    $mUcs4 = ($mUcs4 & 1) << 30;
190
-                    $mState = 5;
191
-                    $mBytes = 6;
192
-                } else {
193
-                    // Current octet is neither in the US-ASCII range nor a
194
-                    // legal first octet of a multi-octet sequence.
195
-                    $mState = 0;
196
-                    $mUcs4  = 0;
197
-                    $mBytes = 1;
198
-                    $char = '';
199
-                }
200
-            } else {
201
-                // When mState is non-zero, we expect a continuation of the
202
-                // multi-octet sequence
203
-                if (0x80 == (0xC0 & ($in))) {
204
-                    // Legal continuation.
205
-                    $shift = ($mState - 1) * 6;
206
-                    $tmp = $in;
207
-                    $tmp = ($tmp & 0x0000003F) << $shift;
208
-                    $mUcs4 |= $tmp;
209
-
210
-                    if (0 == --$mState) {
211
-                        // End of the multi-octet sequence. mUcs4 now contains
212
-                        // the final Unicode codepoint to be output
213
-
214
-                        // Check for illegal sequences and codepoints.
215
-
216
-                        // From Unicode 3.1, non-shortest form is illegal
217
-                        if (((2 == $mBytes) && ($mUcs4 < 0x0080)) ||
218
-                            ((3 == $mBytes) && ($mUcs4 < 0x0800)) ||
219
-                            ((4 == $mBytes) && ($mUcs4 < 0x10000)) ||
220
-                            (4 < $mBytes) ||
221
-                            // From Unicode 3.2, surrogate characters = illegal
222
-                            (($mUcs4 & 0xFFFFF800) == 0xD800) ||
223
-                            // Codepoints outside the Unicode range are illegal
224
-                            ($mUcs4 > 0x10FFFF)
225
-                        ) {
226
-
227
-                        } elseif (0xFEFF != $mUcs4 && // omit BOM
228
-                            // check for valid Char unicode codepoints
229
-                            (
230
-                                0x9 == $mUcs4 ||
231
-                                0xA == $mUcs4 ||
232
-                                0xD == $mUcs4 ||
233
-                                (0x20 <= $mUcs4 && 0x7E >= $mUcs4) ||
234
-                                // 7F-9F is not strictly prohibited by XML,
235
-                                // but it is non-SGML, and thus we don't allow it
236
-                                (0xA0 <= $mUcs4 && 0xD7FF >= $mUcs4) ||
237
-                                (0x10000 <= $mUcs4 && 0x10FFFF >= $mUcs4)
238
-                            )
239
-                        ) {
240
-                            $out .= $char;
241
-                        }
242
-                        // initialize UTF8 cache (reset)
243
-                        $mState = 0;
244
-                        $mUcs4  = 0;
245
-                        $mBytes = 1;
246
-                        $char = '';
247
-                    }
248
-                } else {
249
-                    // ((0xC0 & (*in) != 0x80) && (mState != 0))
250
-                    // Incomplete multi-octet sequence.
251
-                    // used to result in complete fail, but we'll reset
252
-                    $mState = 0;
253
-                    $mUcs4  = 0;
254
-                    $mBytes = 1;
255
-                    $char ='';
256
-                }
257
-            }
258
-        }
259
-        return $out;
260
-    }
261
-
262
-    /**
263
-     * Translates a Unicode codepoint into its corresponding UTF-8 character.
264
-     * @note Based on Feyd's function at
265
-     *       <http://forums.devnetwork.net/viewtopic.php?p=191404#191404>,
266
-     *       which is in public domain.
267
-     * @note While we're going to do code point parsing anyway, a good
268
-     *       optimization would be to refuse to translate code points that
269
-     *       are non-SGML characters.  However, this could lead to duplication.
270
-     * @note This is very similar to the unichr function in
271
-     *       maintenance/generate-entity-file.php (although this is superior,
272
-     *       due to its sanity checks).
273
-     */
274
-
275
-    // +----------+----------+----------+----------+
276
-    // | 33222222 | 22221111 | 111111   |          |
277
-    // | 10987654 | 32109876 | 54321098 | 76543210 | bit
278
-    // +----------+----------+----------+----------+
279
-    // |          |          |          | 0xxxxxxx | 1 byte 0x00000000..0x0000007F
280
-    // |          |          | 110yyyyy | 10xxxxxx | 2 byte 0x00000080..0x000007FF
281
-    // |          | 1110zzzz | 10yyyyyy | 10xxxxxx | 3 byte 0x00000800..0x0000FFFF
282
-    // | 11110www | 10wwzzzz | 10yyyyyy | 10xxxxxx | 4 byte 0x00010000..0x0010FFFF
283
-    // +----------+----------+----------+----------+
284
-    // | 00000000 | 00011111 | 11111111 | 11111111 | Theoretical upper limit of legal scalars: 2097151 (0x001FFFFF)
285
-    // | 00000000 | 00010000 | 11111111 | 11111111 | Defined upper limit of legal scalar codes
286
-    // +----------+----------+----------+----------+
287
-
288
-    public static function unichr($code) {
289
-        if($code > 1114111 or $code < 0 or
290
-          ($code >= 55296 and $code <= 57343) ) {
291
-            // bits are set outside the "valid" range as defined
292
-            // by UNICODE 4.1.0
293
-            return '';
294
-        }
295
-
296
-        $x = $y = $z = $w = 0;
297
-        if ($code < 128) {
298
-            // regular ASCII character
299
-            $x = $code;
300
-        } else {
301
-            // set up bits for UTF-8
302
-            $x = ($code & 63) | 128;
303
-            if ($code < 2048) {
304
-                $y = (($code & 2047) >> 6) | 192;
305
-            } else {
306
-                $y = (($code & 4032) >> 6) | 128;
307
-                if($code < 65536) {
308
-                    $z = (($code >> 12) & 15) | 224;
309
-                } else {
310
-                    $z = (($code >> 12) & 63) | 128;
311
-                    $w = (($code >> 18) & 7)  | 240;
312
-                }
313
-            }
314
-        }
315
-        // set up the actual character
316
-        $ret = '';
317
-        if($w) $ret .= chr($w);
318
-        if($z) $ret .= chr($z);
319
-        if($y) $ret .= chr($y);
320
-        $ret .= chr($x);
321
-
322
-        return $ret;
323
-    }
324
-
325
-    public static function iconvAvailable() {
326
-        static $iconv = null;
327
-        if ($iconv === null) {
328
-            $iconv = function_exists('iconv') && self::testIconvTruncateBug() != self::ICONV_UNUSABLE;
329
-        }
330
-        return $iconv;
331
-    }
332
-
333
-    /**
334
-     * Converts a string to UTF-8 based on configuration.
335
-     */
336
-    public static function convertToUTF8($str, $config, $context) {
337
-        $encoding = $config->get('Core.Encoding');
338
-        if ($encoding === 'utf-8') return $str;
339
-        static $iconv = null;
340
-        if ($iconv === null) $iconv = self::iconvAvailable();
341
-        if ($iconv && !$config->get('Test.ForceNoIconv')) {
342
-            // unaffected by bugs, since UTF-8 support all characters
343
-            $str = self::unsafeIconv($encoding, 'utf-8//IGNORE', $str);
344
-            if ($str === false) {
345
-                // $encoding is not a valid encoding
346
-                trigger_error('Invalid encoding ' . $encoding, E_USER_ERROR);
347
-                return '';
348
-            }
349
-            // If the string is bjorked by Shift_JIS or a similar encoding
350
-            // that doesn't support all of ASCII, convert the naughty
351
-            // characters to their true byte-wise ASCII/UTF-8 equivalents.
352
-            $str = strtr($str, self::testEncodingSupportsASCII($encoding));
353
-            return $str;
354
-        } elseif ($encoding === 'iso-8859-1') {
355
-            $str = utf8_encode($str);
356
-            return $str;
357
-        }
358
-        trigger_error('Encoding not supported, please install iconv', E_USER_ERROR);
359
-    }
360
-
361
-    /**
362
-     * Converts a string from UTF-8 based on configuration.
363
-     * @note Currently, this is a lossy conversion, with unexpressable
364
-     *       characters being omitted.
365
-     */
366
-    public static function convertFromUTF8($str, $config, $context) {
367
-        $encoding = $config->get('Core.Encoding');
368
-        if ($escape = $config->get('Core.EscapeNonASCIICharacters')) {
369
-            $str = self::convertToASCIIDumbLossless($str);
370
-        }
371
-        if ($encoding === 'utf-8') return $str;
372
-        static $iconv = null;
373
-        if ($iconv === null) $iconv = self::iconvAvailable();
374
-        if ($iconv && !$config->get('Test.ForceNoIconv')) {
375
-            // Undo our previous fix in convertToUTF8, otherwise iconv will barf
376
-            $ascii_fix = self::testEncodingSupportsASCII($encoding);
377
-            if (!$escape && !empty($ascii_fix)) {
378
-                $clear_fix = array();
379
-                foreach ($ascii_fix as $utf8 => $native) $clear_fix[$utf8] = '';
380
-                $str = strtr($str, $clear_fix);
381
-            }
382
-            $str = strtr($str, array_flip($ascii_fix));
383
-            // Normal stuff
384
-            $str = self::iconv('utf-8', $encoding . '//IGNORE', $str);
385
-            return $str;
386
-        } elseif ($encoding === 'iso-8859-1') {
387
-            $str = utf8_decode($str);
388
-            return $str;
389
-        }
390
-        trigger_error('Encoding not supported', E_USER_ERROR);
391
-        // You might be tempted to assume that the ASCII representation
392
-        // might be OK, however, this is *not* universally true over all
393
-        // encodings.  So we take the conservative route here, rather
394
-        // than forcibly turn on %Core.EscapeNonASCIICharacters
395
-    }
396
-
397
-    /**
398
-     * Lossless (character-wise) conversion of HTML to ASCII
399
-     * @param $str UTF-8 string to be converted to ASCII
400
-     * @returns ASCII encoded string with non-ASCII character entity-ized
401
-     * @warning Adapted from MediaWiki, claiming fair use: this is a common
402
-     *       algorithm. If you disagree with this license fudgery,
403
-     *       implement it yourself.
404
-     * @note Uses decimal numeric entities since they are best supported.
405
-     * @note This is a DUMB function: it has no concept of keeping
406
-     *       character entities that the projected character encoding
407
-     *       can allow. We could possibly implement a smart version
408
-     *       but that would require it to also know which Unicode
409
-     *       codepoints the charset supported (not an easy task).
410
-     * @note Sort of with cleanUTF8() but it assumes that $str is
411
-     *       well-formed UTF-8
412
-     */
413
-    public static function convertToASCIIDumbLossless($str) {
414
-        $bytesleft = 0;
415
-        $result = '';
416
-        $working = 0;
417
-        $len = strlen($str);
418
-        for( $i = 0; $i < $len; $i++ ) {
419
-            $bytevalue = ord( $str[$i] );
420
-            if( $bytevalue <= 0x7F ) { //0xxx xxxx
421
-                $result .= chr( $bytevalue );
422
-                $bytesleft = 0;
423
-            } elseif( $bytevalue <= 0xBF ) { //10xx xxxx
424
-                $working = $working << 6;
425
-                $working += ($bytevalue & 0x3F);
426
-                $bytesleft--;
427
-                if( $bytesleft <= 0 ) {
428
-                    $result .= "&#" . $working . ";";
429
-                }
430
-            } elseif( $bytevalue <= 0xDF ) { //110x xxxx
431
-                $working = $bytevalue & 0x1F;
432
-                $bytesleft = 1;
433
-            } elseif( $bytevalue <= 0xEF ) { //1110 xxxx
434
-                $working = $bytevalue & 0x0F;
435
-                $bytesleft = 2;
436
-            } else { //1111 0xxx
437
-                $working = $bytevalue & 0x07;
438
-                $bytesleft = 3;
439
-            }
440
-        }
441
-        return $result;
442
-    }
443
-
444
-    /** No bugs detected in iconv. */
445
-    const ICONV_OK = 0;
446
-
447
-    /** Iconv truncates output if converting from UTF-8 to another
448
-     *  character set with //IGNORE, and a non-encodable character is found */
449
-    const ICONV_TRUNCATES = 1;
450
-
451
-    /** Iconv does not support //IGNORE, making it unusable for
452
-     *  transcoding purposes */
453
-    const ICONV_UNUSABLE = 2;
454
-
455
-    /**
456
-     * glibc iconv has a known bug where it doesn't handle the magic
457
-     * //IGNORE stanza correctly.  In particular, rather than ignore
458
-     * characters, it will return an EILSEQ after consuming some number
459
-     * of characters, and expect you to restart iconv as if it were
460
-     * an E2BIG.  Old versions of PHP did not respect the errno, and
461
-     * returned the fragment, so as a result you would see iconv
462
-     * mysteriously truncating output. We can work around this by
463
-     * manually chopping our input into segments of about 8000
464
-     * characters, as long as PHP ignores the error code.  If PHP starts
465
-     * paying attention to the error code, iconv becomes unusable.
466
-     *
467
-     * @returns Error code indicating severity of bug.
468
-     */
469
-    public static function testIconvTruncateBug() {
470
-        static $code = null;
471
-        if ($code === null) {
472
-            // better not use iconv, otherwise infinite loop!
473
-            $r = self::unsafeIconv('utf-8', 'ascii//IGNORE', "\xCE\xB1" . str_repeat('a', 9000));
474
-            if ($r === false) {
475
-                $code = self::ICONV_UNUSABLE;
476
-            } elseif (($c = strlen($r)) < 9000) {
477
-                $code = self::ICONV_TRUNCATES;
478
-            } elseif ($c > 9000) {
479
-                trigger_error('Your copy of iconv is extremely buggy. Please notify HTML Purifier maintainers: include your iconv version as per phpversion()', E_USER_ERROR);
480
-            } else {
481
-                $code = self::ICONV_OK;
482
-            }
483
-        }
484
-        return $code;
485
-    }
486
-
487
-    /**
488
-     * This expensive function tests whether or not a given character
489
-     * encoding supports ASCII. 7/8-bit encodings like Shift_JIS will
490
-     * fail this test, and require special processing. Variable width
491
-     * encodings shouldn't ever fail.
492
-     *
493
-     * @param string $encoding Encoding name to test, as per iconv format
494
-     * @param bool $bypass Whether or not to bypass the precompiled arrays.
495
-     * @return Array of UTF-8 characters to their corresponding ASCII,
496
-     *      which can be used to "undo" any overzealous iconv action.
497
-     */
498
-    public static function testEncodingSupportsASCII($encoding, $bypass = false) {
499
-        // All calls to iconv here are unsafe, proof by case analysis:
500
-        // If ICONV_OK, no difference.
501
-        // If ICONV_TRUNCATE, all calls involve one character inputs,
502
-        // so bug is not triggered.
503
-        // If ICONV_UNUSABLE, this call is irrelevant
504
-        static $encodings = array();
505
-        if (!$bypass) {
506
-            if (isset($encodings[$encoding])) return $encodings[$encoding];
507
-            $lenc = strtolower($encoding);
508
-            switch ($lenc) {
509
-                case 'shift_jis':
510
-                    return array("\xC2\xA5" => '\\', "\xE2\x80\xBE" => '~');
511
-                case 'johab':
512
-                    return array("\xE2\x82\xA9" => '\\');
513
-            }
514
-            if (strpos($lenc, 'iso-8859-') === 0) return array();
515
-        }
516
-        $ret = array();
517
-        if (self::unsafeIconv('UTF-8', $encoding, 'a') === false) return false;
518
-        for ($i = 0x20; $i <= 0x7E; $i++) { // all printable ASCII chars
519
-            $c = chr($i); // UTF-8 char
520
-            $r = self::unsafeIconv('UTF-8', "$encoding//IGNORE", $c); // initial conversion
521
-            if (
522
-                $r === '' ||
523
-                // This line is needed for iconv implementations that do not
524
-                // omit characters that do not exist in the target character set
525
-                ($r === $c && self::unsafeIconv($encoding, 'UTF-8//IGNORE', $r) !== $c)
526
-            ) {
527
-                // Reverse engineer: what's the UTF-8 equiv of this byte
528
-                // sequence? This assumes that there's no variable width
529
-                // encoding that doesn't support ASCII.
530
-                $ret[self::unsafeIconv($encoding, 'UTF-8//IGNORE', $c)] = $c;
531
-            }
532
-        }
533
-        $encodings[$encoding] = $ret;
534
-        return $ret;
535
-    }
10
+	/**
11
+	 * Constructor throws fatal error if you attempt to instantiate class
12
+	 */
13
+	private function __construct() {
14
+		trigger_error('Cannot instantiate encoder, call methods statically', E_USER_ERROR);
15
+	}
16
+
17
+	/**
18
+	 * Error-handler that mutes errors, alternative to shut-up operator.
19
+	 */
20
+	public static function muteErrorHandler() {}
21
+
22
+	/**
23
+	 * iconv wrapper which mutes errors, but doesn't work around bugs.
24
+	 */
25
+	public static function unsafeIconv($in, $out, $text) {
26
+		set_error_handler(array('HTMLPurifier_Encoder', 'muteErrorHandler'));
27
+		$r = iconv($in, $out, $text);
28
+		restore_error_handler();
29
+		return $r;
30
+	}
31
+
32
+	/**
33
+	 * iconv wrapper which mutes errors and works around bugs.
34
+	 */
35
+	public static function iconv($in, $out, $text, $max_chunk_size = 8000) {
36
+		$code = self::testIconvTruncateBug();
37
+		if ($code == self::ICONV_OK) {
38
+			return self::unsafeIconv($in, $out, $text);
39
+		} elseif ($code == self::ICONV_TRUNCATES) {
40
+			// we can only work around this if the input character set
41
+			// is utf-8
42
+			if ($in == 'utf-8') {
43
+				if ($max_chunk_size < 4) {
44
+					trigger_error('max_chunk_size is too small', E_USER_WARNING);
45
+					return false;
46
+				}
47
+				// split into 8000 byte chunks, but be careful to handle
48
+				// multibyte boundaries properly
49
+				if (($c = strlen($text)) <= $max_chunk_size) {
50
+					return self::unsafeIconv($in, $out, $text);
51
+				}
52
+				$r = '';
53
+				$i = 0;
54
+				while (true) {
55
+					if ($i + $max_chunk_size >= $c) {
56
+						$r .= self::unsafeIconv($in, $out, substr($text, $i));
57
+						break;
58
+					}
59
+					// wibble the boundary
60
+					if (0x80 != (0xC0 & ord($text[$i + $max_chunk_size]))) {
61
+						$chunk_size = $max_chunk_size;
62
+					} elseif (0x80 != (0xC0 & ord($text[$i + $max_chunk_size - 1]))) {
63
+						$chunk_size = $max_chunk_size - 1;
64
+					} elseif (0x80 != (0xC0 & ord($text[$i + $max_chunk_size - 2]))) {
65
+						$chunk_size = $max_chunk_size - 2;
66
+					} elseif (0x80 != (0xC0 & ord($text[$i + $max_chunk_size - 3]))) {
67
+						$chunk_size = $max_chunk_size - 3;
68
+					} else {
69
+						return false; // rather confusing UTF-8...
70
+					}
71
+					$chunk = substr($text, $i, $chunk_size); // substr doesn't mind overlong lengths
72
+					$r .= self::unsafeIconv($in, $out, $chunk);
73
+					$i += $chunk_size;
74
+				}
75
+				return $r;
76
+			} else {
77
+				return false;
78
+			}
79
+		} else {
80
+			return false;
81
+		}
82
+	}
83
+
84
+	/**
85
+	 * Cleans a UTF-8 string for well-formedness and SGML validity
86
+	 *
87
+	 * It will parse according to UTF-8 and return a valid UTF8 string, with
88
+	 * non-SGML codepoints excluded.
89
+	 *
90
+	 * @note Just for reference, the non-SGML code points are 0 to 31 and
91
+	 *       127 to 159, inclusive.  However, we allow code points 9, 10
92
+	 *       and 13, which are the tab, line feed and carriage return
93
+	 *       respectively. 128 and above the code points map to multibyte
94
+	 *       UTF-8 representations.
95
+	 *
96
+	 * @note Fallback code adapted from utf8ToUnicode by Henri Sivonen and
97
+	 *       [email protected] at <http://iki.fi/hsivonen/php-utf8/> under the
98
+	 *       LGPL license.  Notes on what changed are inside, but in general,
99
+	 *       the original code transformed UTF-8 text into an array of integer
100
+	 *       Unicode codepoints. Understandably, transforming that back to
101
+	 *       a string would be somewhat expensive, so the function was modded to
102
+	 *       directly operate on the string.  However, this discourages code
103
+	 *       reuse, and the logic enumerated here would be useful for any
104
+	 *       function that needs to be able to understand UTF-8 characters.
105
+	 *       As of right now, only smart lossless character encoding converters
106
+	 *       would need that, and I'm probably not going to implement them.
107
+	 *       Once again, PHP 6 should solve all our problems.
108
+	 */
109
+	public static function cleanUTF8($str, $force_php = false) {
110
+
111
+		// UTF-8 validity is checked since PHP 4.3.5
112
+		// This is an optimization: if the string is already valid UTF-8, no
113
+		// need to do PHP stuff. 99% of the time, this will be the case.
114
+		// The regexp matches the XML char production, as well as well as excluding
115
+		// non-SGML codepoints U+007F to U+009F
116
+		if (preg_match('/^[\x{9}\x{A}\x{D}\x{20}-\x{7E}\x{A0}-\x{D7FF}\x{E000}-\x{FFFD}\x{10000}-\x{10FFFF}]*$/Du', $str)) {
117
+			return $str;
118
+		}
119
+
120
+		$mState = 0; // cached expected number of octets after the current octet
121
+					 // until the beginning of the next UTF8 character sequence
122
+		$mUcs4  = 0; // cached Unicode character
123
+		$mBytes = 1; // cached expected number of octets in the current sequence
124
+
125
+		// original code involved an $out that was an array of Unicode
126
+		// codepoints.  Instead of having to convert back into UTF-8, we've
127
+		// decided to directly append valid UTF-8 characters onto a string
128
+		// $out once they're done.  $char accumulates raw bytes, while $mUcs4
129
+		// turns into the Unicode code point, so there's some redundancy.
130
+
131
+		$out = '';
132
+		$char = '';
133
+
134
+		$len = strlen($str);
135
+		for($i = 0; $i < $len; $i++) {
136
+			$in = ord($str{$i});
137
+			$char .= $str[$i]; // append byte to char
138
+			if (0 == $mState) {
139
+				// When mState is zero we expect either a US-ASCII character
140
+				// or a multi-octet sequence.
141
+				if (0 == (0x80 & ($in))) {
142
+					// US-ASCII, pass straight through.
143
+					if (($in <= 31 || $in == 127) &&
144
+						!($in == 9 || $in == 13 || $in == 10) // save \r\t\n
145
+					) {
146
+						// control characters, remove
147
+					} else {
148
+						$out .= $char;
149
+					}
150
+					// reset
151
+					$char = '';
152
+					$mBytes = 1;
153
+				} elseif (0xC0 == (0xE0 & ($in))) {
154
+					// First octet of 2 octet sequence
155
+					$mUcs4 = ($in);
156
+					$mUcs4 = ($mUcs4 & 0x1F) << 6;
157
+					$mState = 1;
158
+					$mBytes = 2;
159
+				} elseif (0xE0 == (0xF0 & ($in))) {
160
+					// First octet of 3 octet sequence
161
+					$mUcs4 = ($in);
162
+					$mUcs4 = ($mUcs4 & 0x0F) << 12;
163
+					$mState = 2;
164
+					$mBytes = 3;
165
+				} elseif (0xF0 == (0xF8 & ($in))) {
166
+					// First octet of 4 octet sequence
167
+					$mUcs4 = ($in);
168
+					$mUcs4 = ($mUcs4 & 0x07) << 18;
169
+					$mState = 3;
170
+					$mBytes = 4;
171
+				} elseif (0xF8 == (0xFC & ($in))) {
172
+					// First octet of 5 octet sequence.
173
+					//
174
+					// This is illegal because the encoded codepoint must be
175
+					// either:
176
+					// (a) not the shortest form or
177
+					// (b) outside the Unicode range of 0-0x10FFFF.
178
+					// Rather than trying to resynchronize, we will carry on
179
+					// until the end of the sequence and let the later error
180
+					// handling code catch it.
181
+					$mUcs4 = ($in);
182
+					$mUcs4 = ($mUcs4 & 0x03) << 24;
183
+					$mState = 4;
184
+					$mBytes = 5;
185
+				} elseif (0xFC == (0xFE & ($in))) {
186
+					// First octet of 6 octet sequence, see comments for 5
187
+					// octet sequence.
188
+					$mUcs4 = ($in);
189
+					$mUcs4 = ($mUcs4 & 1) << 30;
190
+					$mState = 5;
191
+					$mBytes = 6;
192
+				} else {
193
+					// Current octet is neither in the US-ASCII range nor a
194
+					// legal first octet of a multi-octet sequence.
195
+					$mState = 0;
196
+					$mUcs4  = 0;
197
+					$mBytes = 1;
198
+					$char = '';
199
+				}
200
+			} else {
201
+				// When mState is non-zero, we expect a continuation of the
202
+				// multi-octet sequence
203
+				if (0x80 == (0xC0 & ($in))) {
204
+					// Legal continuation.
205
+					$shift = ($mState - 1) * 6;
206
+					$tmp = $in;
207
+					$tmp = ($tmp & 0x0000003F) << $shift;
208
+					$mUcs4 |= $tmp;
209
+
210
+					if (0 == --$mState) {
211
+						// End of the multi-octet sequence. mUcs4 now contains
212
+						// the final Unicode codepoint to be output
213
+
214
+						// Check for illegal sequences and codepoints.
215
+
216
+						// From Unicode 3.1, non-shortest form is illegal
217
+						if (((2 == $mBytes) && ($mUcs4 < 0x0080)) ||
218
+							((3 == $mBytes) && ($mUcs4 < 0x0800)) ||
219
+							((4 == $mBytes) && ($mUcs4 < 0x10000)) ||
220
+							(4 < $mBytes) ||
221
+							// From Unicode 3.2, surrogate characters = illegal
222
+							(($mUcs4 & 0xFFFFF800) == 0xD800) ||
223
+							// Codepoints outside the Unicode range are illegal
224
+							($mUcs4 > 0x10FFFF)
225
+						) {
226
+
227
+						} elseif (0xFEFF != $mUcs4 && // omit BOM
228
+							// check for valid Char unicode codepoints
229
+							(
230
+								0x9 == $mUcs4 ||
231
+								0xA == $mUcs4 ||
232
+								0xD == $mUcs4 ||
233
+								(0x20 <= $mUcs4 && 0x7E >= $mUcs4) ||
234
+								// 7F-9F is not strictly prohibited by XML,
235
+								// but it is non-SGML, and thus we don't allow it
236
+								(0xA0 <= $mUcs4 && 0xD7FF >= $mUcs4) ||
237
+								(0x10000 <= $mUcs4 && 0x10FFFF >= $mUcs4)
238
+							)
239
+						) {
240
+							$out .= $char;
241
+						}
242
+						// initialize UTF8 cache (reset)
243
+						$mState = 0;
244
+						$mUcs4  = 0;
245
+						$mBytes = 1;
246
+						$char = '';
247
+					}
248
+				} else {
249
+					// ((0xC0 & (*in) != 0x80) && (mState != 0))
250
+					// Incomplete multi-octet sequence.
251
+					// used to result in complete fail, but we'll reset
252
+					$mState = 0;
253
+					$mUcs4  = 0;
254
+					$mBytes = 1;
255
+					$char ='';
256
+				}
257
+			}
258
+		}
259
+		return $out;
260
+	}
261
+
262
+	/**
263
+	 * Translates a Unicode codepoint into its corresponding UTF-8 character.
264
+	 * @note Based on Feyd's function at
265
+	 *       <http://forums.devnetwork.net/viewtopic.php?p=191404#191404>,
266
+	 *       which is in public domain.
267
+	 * @note While we're going to do code point parsing anyway, a good
268
+	 *       optimization would be to refuse to translate code points that
269
+	 *       are non-SGML characters.  However, this could lead to duplication.
270
+	 * @note This is very similar to the unichr function in
271
+	 *       maintenance/generate-entity-file.php (although this is superior,
272
+	 *       due to its sanity checks).
273
+	 */
274
+
275
+	// +----------+----------+----------+----------+
276
+	// | 33222222 | 22221111 | 111111   |          |
277
+	// | 10987654 | 32109876 | 54321098 | 76543210 | bit
278
+	// +----------+----------+----------+----------+
279
+	// |          |          |          | 0xxxxxxx | 1 byte 0x00000000..0x0000007F
280
+	// |          |          | 110yyyyy | 10xxxxxx | 2 byte 0x00000080..0x000007FF
281
+	// |          | 1110zzzz | 10yyyyyy | 10xxxxxx | 3 byte 0x00000800..0x0000FFFF
282
+	// | 11110www | 10wwzzzz | 10yyyyyy | 10xxxxxx | 4 byte 0x00010000..0x0010FFFF
283
+	// +----------+----------+----------+----------+
284
+	// | 00000000 | 00011111 | 11111111 | 11111111 | Theoretical upper limit of legal scalars: 2097151 (0x001FFFFF)
285
+	// | 00000000 | 00010000 | 11111111 | 11111111 | Defined upper limit of legal scalar codes
286
+	// +----------+----------+----------+----------+
287
+
288
+	public static function unichr($code) {
289
+		if($code > 1114111 or $code < 0 or
290
+		  ($code >= 55296 and $code <= 57343) ) {
291
+			// bits are set outside the "valid" range as defined
292
+			// by UNICODE 4.1.0
293
+			return '';
294
+		}
295
+
296
+		$x = $y = $z = $w = 0;
297
+		if ($code < 128) {
298
+			// regular ASCII character
299
+			$x = $code;
300
+		} else {
301
+			// set up bits for UTF-8
302
+			$x = ($code & 63) | 128;
303
+			if ($code < 2048) {
304
+				$y = (($code & 2047) >> 6) | 192;
305
+			} else {
306
+				$y = (($code & 4032) >> 6) | 128;
307
+				if($code < 65536) {
308
+					$z = (($code >> 12) & 15) | 224;
309
+				} else {
310
+					$z = (($code >> 12) & 63) | 128;
311
+					$w = (($code >> 18) & 7)  | 240;
312
+				}
313
+			}
314
+		}
315
+		// set up the actual character
316
+		$ret = '';
317
+		if($w) $ret .= chr($w);
318
+		if($z) $ret .= chr($z);
319
+		if($y) $ret .= chr($y);
320
+		$ret .= chr($x);
321
+
322
+		return $ret;
323
+	}
324
+
325
+	public static function iconvAvailable() {
326
+		static $iconv = null;
327
+		if ($iconv === null) {
328
+			$iconv = function_exists('iconv') && self::testIconvTruncateBug() != self::ICONV_UNUSABLE;
329
+		}
330
+		return $iconv;
331
+	}
332
+
333
+	/**
334
+	 * Converts a string to UTF-8 based on configuration.
335
+	 */
336
+	public static function convertToUTF8($str, $config, $context) {
337
+		$encoding = $config->get('Core.Encoding');
338
+		if ($encoding === 'utf-8') return $str;
339
+		static $iconv = null;
340
+		if ($iconv === null) $iconv = self::iconvAvailable();
341
+		if ($iconv && !$config->get('Test.ForceNoIconv')) {
342
+			// unaffected by bugs, since UTF-8 support all characters
343
+			$str = self::unsafeIconv($encoding, 'utf-8//IGNORE', $str);
344
+			if ($str === false) {
345
+				// $encoding is not a valid encoding
346
+				trigger_error('Invalid encoding ' . $encoding, E_USER_ERROR);
347
+				return '';
348
+			}
349
+			// If the string is bjorked by Shift_JIS or a similar encoding
350
+			// that doesn't support all of ASCII, convert the naughty
351
+			// characters to their true byte-wise ASCII/UTF-8 equivalents.
352
+			$str = strtr($str, self::testEncodingSupportsASCII($encoding));
353
+			return $str;
354
+		} elseif ($encoding === 'iso-8859-1') {
355
+			$str = utf8_encode($str);
356
+			return $str;
357
+		}
358
+		trigger_error('Encoding not supported, please install iconv', E_USER_ERROR);
359
+	}
360
+
361
+	/**
362
+	 * Converts a string from UTF-8 based on configuration.
363
+	 * @note Currently, this is a lossy conversion, with unexpressable
364
+	 *       characters being omitted.
365
+	 */
366
+	public static function convertFromUTF8($str, $config, $context) {
367
+		$encoding = $config->get('Core.Encoding');
368
+		if ($escape = $config->get('Core.EscapeNonASCIICharacters')) {
369
+			$str = self::convertToASCIIDumbLossless($str);
370
+		}
371
+		if ($encoding === 'utf-8') return $str;
372
+		static $iconv = null;
373
+		if ($iconv === null) $iconv = self::iconvAvailable();
374
+		if ($iconv && !$config->get('Test.ForceNoIconv')) {
375
+			// Undo our previous fix in convertToUTF8, otherwise iconv will barf
376
+			$ascii_fix = self::testEncodingSupportsASCII($encoding);
377
+			if (!$escape && !empty($ascii_fix)) {
378
+				$clear_fix = array();
379
+				foreach ($ascii_fix as $utf8 => $native) $clear_fix[$utf8] = '';
380
+				$str = strtr($str, $clear_fix);
381
+			}
382
+			$str = strtr($str, array_flip($ascii_fix));
383
+			// Normal stuff
384
+			$str = self::iconv('utf-8', $encoding . '//IGNORE', $str);
385
+			return $str;
386
+		} elseif ($encoding === 'iso-8859-1') {
387
+			$str = utf8_decode($str);
388
+			return $str;
389
+		}
390
+		trigger_error('Encoding not supported', E_USER_ERROR);
391
+		// You might be tempted to assume that the ASCII representation
392
+		// might be OK, however, this is *not* universally true over all
393
+		// encodings.  So we take the conservative route here, rather
394
+		// than forcibly turn on %Core.EscapeNonASCIICharacters
395
+	}
396
+
397
+	/**
398
+	 * Lossless (character-wise) conversion of HTML to ASCII
399
+	 * @param $str UTF-8 string to be converted to ASCII
400
+	 * @returns ASCII encoded string with non-ASCII character entity-ized
401
+	 * @warning Adapted from MediaWiki, claiming fair use: this is a common
402
+	 *       algorithm. If you disagree with this license fudgery,
403
+	 *       implement it yourself.
404
+	 * @note Uses decimal numeric entities since they are best supported.
405
+	 * @note This is a DUMB function: it has no concept of keeping
406
+	 *       character entities that the projected character encoding
407
+	 *       can allow. We could possibly implement a smart version
408
+	 *       but that would require it to also know which Unicode
409
+	 *       codepoints the charset supported (not an easy task).
410
+	 * @note Sort of with cleanUTF8() but it assumes that $str is
411
+	 *       well-formed UTF-8
412
+	 */
413
+	public static function convertToASCIIDumbLossless($str) {
414
+		$bytesleft = 0;
415
+		$result = '';
416
+		$working = 0;
417
+		$len = strlen($str);
418
+		for( $i = 0; $i < $len; $i++ ) {
419
+			$bytevalue = ord( $str[$i] );
420
+			if( $bytevalue <= 0x7F ) { //0xxx xxxx
421
+				$result .= chr( $bytevalue );
422
+				$bytesleft = 0;
423
+			} elseif( $bytevalue <= 0xBF ) { //10xx xxxx
424
+				$working = $working << 6;
425
+				$working += ($bytevalue & 0x3F);
426
+				$bytesleft--;
427
+				if( $bytesleft <= 0 ) {
428
+					$result .= "&#" . $working . ";";
429
+				}
430
+			} elseif( $bytevalue <= 0xDF ) { //110x xxxx
431
+				$working = $bytevalue & 0x1F;
432
+				$bytesleft = 1;
433
+			} elseif( $bytevalue <= 0xEF ) { //1110 xxxx
434
+				$working = $bytevalue & 0x0F;
435
+				$bytesleft = 2;
436
+			} else { //1111 0xxx
437
+				$working = $bytevalue & 0x07;
438
+				$bytesleft = 3;
439
+			}
440
+		}
441
+		return $result;
442
+	}
443
+
444
+	/** No bugs detected in iconv. */
445
+	const ICONV_OK = 0;
446
+
447
+	/** Iconv truncates output if converting from UTF-8 to another
448
+	 *  character set with //IGNORE, and a non-encodable character is found */
449
+	const ICONV_TRUNCATES = 1;
450
+
451
+	/** Iconv does not support //IGNORE, making it unusable for
452
+	 *  transcoding purposes */
453
+	const ICONV_UNUSABLE = 2;
454
+
455
+	/**
456
+	 * glibc iconv has a known bug where it doesn't handle the magic
457
+	 * //IGNORE stanza correctly.  In particular, rather than ignore
458
+	 * characters, it will return an EILSEQ after consuming some number
459
+	 * of characters, and expect you to restart iconv as if it were
460
+	 * an E2BIG.  Old versions of PHP did not respect the errno, and
461
+	 * returned the fragment, so as a result you would see iconv
462
+	 * mysteriously truncating output. We can work around this by
463
+	 * manually chopping our input into segments of about 8000
464
+	 * characters, as long as PHP ignores the error code.  If PHP starts
465
+	 * paying attention to the error code, iconv becomes unusable.
466
+	 *
467
+	 * @returns Error code indicating severity of bug.
468
+	 */
469
+	public static function testIconvTruncateBug() {
470
+		static $code = null;
471
+		if ($code === null) {
472
+			// better not use iconv, otherwise infinite loop!
473
+			$r = self::unsafeIconv('utf-8', 'ascii//IGNORE', "\xCE\xB1" . str_repeat('a', 9000));
474
+			if ($r === false) {
475
+				$code = self::ICONV_UNUSABLE;
476
+			} elseif (($c = strlen($r)) < 9000) {
477
+				$code = self::ICONV_TRUNCATES;
478
+			} elseif ($c > 9000) {
479
+				trigger_error('Your copy of iconv is extremely buggy. Please notify HTML Purifier maintainers: include your iconv version as per phpversion()', E_USER_ERROR);
480
+			} else {
481
+				$code = self::ICONV_OK;
482
+			}
483
+		}
484
+		return $code;
485
+	}
486
+
487
+	/**
488
+	 * This expensive function tests whether or not a given character
489
+	 * encoding supports ASCII. 7/8-bit encodings like Shift_JIS will
490
+	 * fail this test, and require special processing. Variable width
491
+	 * encodings shouldn't ever fail.
492
+	 *
493
+	 * @param string $encoding Encoding name to test, as per iconv format
494
+	 * @param bool $bypass Whether or not to bypass the precompiled arrays.
495
+	 * @return Array of UTF-8 characters to their corresponding ASCII,
496
+	 *      which can be used to "undo" any overzealous iconv action.
497
+	 */
498
+	public static function testEncodingSupportsASCII($encoding, $bypass = false) {
499
+		// All calls to iconv here are unsafe, proof by case analysis:
500
+		// If ICONV_OK, no difference.
501
+		// If ICONV_TRUNCATE, all calls involve one character inputs,
502
+		// so bug is not triggered.
503
+		// If ICONV_UNUSABLE, this call is irrelevant
504
+		static $encodings = array();
505
+		if (!$bypass) {
506
+			if (isset($encodings[$encoding])) return $encodings[$encoding];
507
+			$lenc = strtolower($encoding);
508
+			switch ($lenc) {
509
+				case 'shift_jis':
510
+					return array("\xC2\xA5" => '\\', "\xE2\x80\xBE" => '~');
511
+				case 'johab':
512
+					return array("\xE2\x82\xA9" => '\\');
513
+			}
514
+			if (strpos($lenc, 'iso-8859-') === 0) return array();
515
+		}
516
+		$ret = array();
517
+		if (self::unsafeIconv('UTF-8', $encoding, 'a') === false) return false;
518
+		for ($i = 0x20; $i <= 0x7E; $i++) { // all printable ASCII chars
519
+			$c = chr($i); // UTF-8 char
520
+			$r = self::unsafeIconv('UTF-8', "$encoding//IGNORE", $c); // initial conversion
521
+			if (
522
+				$r === '' ||
523
+				// This line is needed for iconv implementations that do not
524
+				// omit characters that do not exist in the target character set
525
+				($r === $c && self::unsafeIconv($encoding, 'UTF-8//IGNORE', $r) !== $c)
526
+			) {
527
+				// Reverse engineer: what's the UTF-8 equiv of this byte
528
+				// sequence? This assumes that there's no variable width
529
+				// encoding that doesn't support ASCII.
530
+				$ret[self::unsafeIconv($encoding, 'UTF-8//IGNORE', $c)] = $c;
531
+			}
532
+		}
533
+		$encodings[$encoding] = $ret;
534
+		return $ret;
535
+	}
536 536
 
537 537
 
538 538
 }
Please login to merge, or discard this patch.
Braces   +33 added lines, -11 removed lines patch added patch discarded remove patch
@@ -314,9 +314,15 @@  discard block
 block discarded – undo
314 314
         }
315 315
         // set up the actual character
316 316
         $ret = '';
317
-        if($w) $ret .= chr($w);
318
-        if($z) $ret .= chr($z);
319
-        if($y) $ret .= chr($y);
317
+        if($w) {
318
+        	$ret .= chr($w);
319
+        }
320
+        if($z) {
321
+        	$ret .= chr($z);
322
+        }
323
+        if($y) {
324
+        	$ret .= chr($y);
325
+        }
320 326
         $ret .= chr($x);
321 327
 
322 328
         return $ret;
@@ -335,9 +341,13 @@  discard block
 block discarded – undo
335 341
      */
336 342
     public static function convertToUTF8($str, $config, $context) {
337 343
         $encoding = $config->get('Core.Encoding');
338
-        if ($encoding === 'utf-8') return $str;
344
+        if ($encoding === 'utf-8') {
345
+        	return $str;
346
+        }
339 347
         static $iconv = null;
340
-        if ($iconv === null) $iconv = self::iconvAvailable();
348
+        if ($iconv === null) {
349
+        	$iconv = self::iconvAvailable();
350
+        }
341 351
         if ($iconv && !$config->get('Test.ForceNoIconv')) {
342 352
             // unaffected by bugs, since UTF-8 support all characters
343 353
             $str = self::unsafeIconv($encoding, 'utf-8//IGNORE', $str);
@@ -368,15 +378,21 @@  discard block
 block discarded – undo
368 378
         if ($escape = $config->get('Core.EscapeNonASCIICharacters')) {
369 379
             $str = self::convertToASCIIDumbLossless($str);
370 380
         }
371
-        if ($encoding === 'utf-8') return $str;
381
+        if ($encoding === 'utf-8') {
382
+        	return $str;
383
+        }
372 384
         static $iconv = null;
373
-        if ($iconv === null) $iconv = self::iconvAvailable();
385
+        if ($iconv === null) {
386
+        	$iconv = self::iconvAvailable();
387
+        }
374 388
         if ($iconv && !$config->get('Test.ForceNoIconv')) {
375 389
             // Undo our previous fix in convertToUTF8, otherwise iconv will barf
376 390
             $ascii_fix = self::testEncodingSupportsASCII($encoding);
377 391
             if (!$escape && !empty($ascii_fix)) {
378 392
                 $clear_fix = array();
379
-                foreach ($ascii_fix as $utf8 => $native) $clear_fix[$utf8] = '';
393
+                foreach ($ascii_fix as $utf8 => $native) {
394
+                	$clear_fix[$utf8] = '';
395
+                }
380 396
                 $str = strtr($str, $clear_fix);
381 397
             }
382 398
             $str = strtr($str, array_flip($ascii_fix));
@@ -503,7 +519,9 @@  discard block
 block discarded – undo
503 519
         // If ICONV_UNUSABLE, this call is irrelevant
504 520
         static $encodings = array();
505 521
         if (!$bypass) {
506
-            if (isset($encodings[$encoding])) return $encodings[$encoding];
522
+            if (isset($encodings[$encoding])) {
523
+            	return $encodings[$encoding];
524
+            }
507 525
             $lenc = strtolower($encoding);
508 526
             switch ($lenc) {
509 527
                 case 'shift_jis':
@@ -511,10 +529,14 @@  discard block
 block discarded – undo
511 529
                 case 'johab':
512 530
                     return array("\xE2\x82\xA9" => '\\');
513 531
             }
514
-            if (strpos($lenc, 'iso-8859-') === 0) return array();
532
+            if (strpos($lenc, 'iso-8859-') === 0) {
533
+            	return array();
534
+            }
515 535
         }
516 536
         $ret = array();
517
-        if (self::unsafeIconv('UTF-8', $encoding, 'a') === false) return false;
537
+        if (self::unsafeIconv('UTF-8', $encoding, 'a') === false) {
538
+        	return false;
539
+        }
518 540
         for ($i = 0x20; $i <= 0x7E; $i++) { // all printable ASCII chars
519 541
             $c = chr($i); // UTF-8 char
520 542
             $r = self::unsafeIconv('UTF-8', "$encoding//IGNORE", $c); // initial conversion
Please login to merge, or discard this patch.
Spacing   +21 added lines, -21 removed lines patch added patch discarded remove patch
@@ -132,7 +132,7 @@  discard block
 block discarded – undo
132 132
         $char = '';
133 133
 
134 134
         $len = strlen($str);
135
-        for($i = 0; $i < $len; $i++) {
135
+        for ($i = 0; $i < $len; $i++) {
136 136
             $in = ord($str{$i});
137 137
             $char .= $str[$i]; // append byte to char
138 138
             if (0 == $mState) {
@@ -252,7 +252,7 @@  discard block
 block discarded – undo
252 252
                     $mState = 0;
253 253
                     $mUcs4  = 0;
254 254
                     $mBytes = 1;
255
-                    $char ='';
255
+                    $char = '';
256 256
                 }
257 257
             }
258 258
         }
@@ -286,8 +286,8 @@  discard block
 block discarded – undo
286 286
     // +----------+----------+----------+----------+
287 287
 
288 288
     public static function unichr($code) {
289
-        if($code > 1114111 or $code < 0 or
290
-          ($code >= 55296 and $code <= 57343) ) {
289
+        if ($code > 1114111 or $code < 0 or
290
+          ($code >= 55296 and $code <= 57343)) {
291 291
             // bits are set outside the "valid" range as defined
292 292
             // by UNICODE 4.1.0
293 293
             return '';
@@ -304,19 +304,19 @@  discard block
 block discarded – undo
304 304
                 $y = (($code & 2047) >> 6) | 192;
305 305
             } else {
306 306
                 $y = (($code & 4032) >> 6) | 128;
307
-                if($code < 65536) {
307
+                if ($code < 65536) {
308 308
                     $z = (($code >> 12) & 15) | 224;
309 309
                 } else {
310 310
                     $z = (($code >> 12) & 63) | 128;
311
-                    $w = (($code >> 18) & 7)  | 240;
311
+                    $w = (($code >> 18) & 7) | 240;
312 312
                 }
313 313
             }
314 314
         }
315 315
         // set up the actual character
316 316
         $ret = '';
317
-        if($w) $ret .= chr($w);
318
-        if($z) $ret .= chr($z);
319
-        if($y) $ret .= chr($y);
317
+        if ($w) $ret .= chr($w);
318
+        if ($z) $ret .= chr($z);
319
+        if ($y) $ret .= chr($y);
320 320
         $ret .= chr($x);
321 321
 
322 322
         return $ret;
@@ -343,7 +343,7 @@  discard block
 block discarded – undo
343 343
             $str = self::unsafeIconv($encoding, 'utf-8//IGNORE', $str);
344 344
             if ($str === false) {
345 345
                 // $encoding is not a valid encoding
346
-                trigger_error('Invalid encoding ' . $encoding, E_USER_ERROR);
346
+                trigger_error('Invalid encoding '.$encoding, E_USER_ERROR);
347 347
                 return '';
348 348
             }
349 349
             // If the string is bjorked by Shift_JIS or a similar encoding
@@ -381,7 +381,7 @@  discard block
 block discarded – undo
381 381
             }
382 382
             $str = strtr($str, array_flip($ascii_fix));
383 383
             // Normal stuff
384
-            $str = self::iconv('utf-8', $encoding . '//IGNORE', $str);
384
+            $str = self::iconv('utf-8', $encoding.'//IGNORE', $str);
385 385
             return $str;
386 386
         } elseif ($encoding === 'iso-8859-1') {
387 387
             $str = utf8_decode($str);
@@ -415,22 +415,22 @@  discard block
 block discarded – undo
415 415
         $result = '';
416 416
         $working = 0;
417 417
         $len = strlen($str);
418
-        for( $i = 0; $i < $len; $i++ ) {
419
-            $bytevalue = ord( $str[$i] );
420
-            if( $bytevalue <= 0x7F ) { //0xxx xxxx
421
-                $result .= chr( $bytevalue );
418
+        for ($i = 0; $i < $len; $i++) {
419
+            $bytevalue = ord($str[$i]);
420
+            if ($bytevalue <= 0x7F) { //0xxx xxxx
421
+                $result .= chr($bytevalue);
422 422
                 $bytesleft = 0;
423
-            } elseif( $bytevalue <= 0xBF ) { //10xx xxxx
423
+            } elseif ($bytevalue <= 0xBF) { //10xx xxxx
424 424
                 $working = $working << 6;
425 425
                 $working += ($bytevalue & 0x3F);
426 426
                 $bytesleft--;
427
-                if( $bytesleft <= 0 ) {
428
-                    $result .= "&#" . $working . ";";
427
+                if ($bytesleft <= 0) {
428
+                    $result .= "&#".$working.";";
429 429
                 }
430
-            } elseif( $bytevalue <= 0xDF ) { //110x xxxx
430
+            } elseif ($bytevalue <= 0xDF) { //110x xxxx
431 431
                 $working = $bytevalue & 0x1F;
432 432
                 $bytesleft = 1;
433
-            } elseif( $bytevalue <= 0xEF ) { //1110 xxxx
433
+            } elseif ($bytevalue <= 0xEF) { //1110 xxxx
434 434
                 $working = $bytevalue & 0x0F;
435 435
                 $bytesleft = 2;
436 436
             } else { //1111 0xxx
@@ -470,7 +470,7 @@  discard block
 block discarded – undo
470 470
         static $code = null;
471 471
         if ($code === null) {
472 472
             // better not use iconv, otherwise infinite loop!
473
-            $r = self::unsafeIconv('utf-8', 'ascii//IGNORE', "\xCE\xB1" . str_repeat('a', 9000));
473
+            $r = self::unsafeIconv('utf-8', 'ascii//IGNORE', "\xCE\xB1".str_repeat('a', 9000));
474 474
             if ($r === false) {
475 475
                 $code = self::ICONV_UNUSABLE;
476 476
             } elseif (($c = strlen($r)) < 9000) {
Please login to merge, or discard this patch.
classes/security/htmlpurifier/library/HTMLPurifier/ErrorCollector.php 4 patches
Doc Comments   +3 added lines patch added patch discarded remove patch
@@ -25,6 +25,9 @@
 block discarded – undo
25 25
 
26 26
     protected $lines = array();
27 27
 
28
+    /**
29
+     * @param HTMLPurifier_Context $context
30
+     */
28 31
     public function __construct($context) {
29 32
         $this->locale    =& $context->get('Locale');
30 33
         $this->context   = $context;
Please login to merge, or discard this patch.
Indentation   +196 added lines, -196 removed lines patch added patch discarded remove patch
@@ -7,202 +7,202 @@
 block discarded – undo
7 7
 class HTMLPurifier_ErrorCollector
8 8
 {
9 9
 
10
-    /**
11
-     * Identifiers for the returned error array. These are purposely numeric
12
-     * so list() can be used.
13
-     */
14
-    const LINENO   = 0;
15
-    const SEVERITY = 1;
16
-    const MESSAGE  = 2;
17
-    const CHILDREN = 3;
18
-
19
-    protected $errors;
20
-    protected $_current;
21
-    protected $_stacks = array(array());
22
-    protected $locale;
23
-    protected $generator;
24
-    protected $context;
25
-
26
-    protected $lines = array();
27
-
28
-    public function __construct($context) {
29
-        $this->locale    =& $context->get('Locale');
30
-        $this->context   = $context;
31
-        $this->_current  =& $this->_stacks[0];
32
-        $this->errors    =& $this->_stacks[0];
33
-    }
34
-
35
-    /**
36
-     * Sends an error message to the collector for later use
37
-     * @param $severity int Error severity, PHP error style (don't use E_USER_)
38
-     * @param $msg string Error message text
39
-     * @param $subst1 string First substitution for $msg
40
-     * @param $subst2 string ...
41
-     */
42
-    public function send($severity, $msg) {
43
-
44
-        $args = array();
45
-        if (func_num_args() > 2) {
46
-            $args = func_get_args();
47
-            array_shift($args);
48
-            unset($args[0]);
49
-        }
50
-
51
-        $token = $this->context->get('CurrentToken', true);
52
-        $line  = $token ? $token->line : $this->context->get('CurrentLine', true);
53
-        $col   = $token ? $token->col  : $this->context->get('CurrentCol',  true);
54
-        $attr  = $this->context->get('CurrentAttr', true);
55
-
56
-        // perform special substitutions, also add custom parameters
57
-        $subst = array();
58
-        if (!is_null($token)) {
59
-            $args['CurrentToken'] = $token;
60
-        }
61
-        if (!is_null($attr)) {
62
-            $subst['$CurrentAttr.Name'] = $attr;
63
-            if (isset($token->attr[$attr])) $subst['$CurrentAttr.Value'] = $token->attr[$attr];
64
-        }
65
-
66
-        if (empty($args)) {
67
-            $msg = $this->locale->getMessage($msg);
68
-        } else {
69
-            $msg = $this->locale->formatMessage($msg, $args);
70
-        }
71
-
72
-        if (!empty($subst)) $msg = strtr($msg, $subst);
73
-
74
-        // (numerically indexed)
75
-        $error = array(
76
-            self::LINENO   => $line,
77
-            self::SEVERITY => $severity,
78
-            self::MESSAGE  => $msg,
79
-            self::CHILDREN => array()
80
-        );
81
-        $this->_current[] = $error;
82
-
83
-
84
-        // NEW CODE BELOW ...
85
-
86
-        $struct = null;
87
-        // Top-level errors are either:
88
-        //  TOKEN type, if $value is set appropriately, or
89
-        //  "syntax" type, if $value is null
90
-        $new_struct = new HTMLPurifier_ErrorStruct();
91
-        $new_struct->type = HTMLPurifier_ErrorStruct::TOKEN;
92
-        if ($token) $new_struct->value = clone $token;
93
-        if (is_int($line) && is_int($col)) {
94
-            if (isset($this->lines[$line][$col])) {
95
-                $struct = $this->lines[$line][$col];
96
-            } else {
97
-                $struct = $this->lines[$line][$col] = $new_struct;
98
-            }
99
-            // These ksorts may present a performance problem
100
-            ksort($this->lines[$line], SORT_NUMERIC);
101
-        } else {
102
-            if (isset($this->lines[-1])) {
103
-                $struct = $this->lines[-1];
104
-            } else {
105
-                $struct = $this->lines[-1] = $new_struct;
106
-            }
107
-        }
108
-        ksort($this->lines, SORT_NUMERIC);
109
-
110
-        // Now, check if we need to operate on a lower structure
111
-        if (!empty($attr)) {
112
-            $struct = $struct->getChild(HTMLPurifier_ErrorStruct::ATTR, $attr);
113
-            if (!$struct->value) {
114
-                $struct->value = array($attr, 'PUT VALUE HERE');
115
-            }
116
-        }
117
-        if (!empty($cssprop)) {
118
-            $struct = $struct->getChild(HTMLPurifier_ErrorStruct::CSSPROP, $cssprop);
119
-            if (!$struct->value) {
120
-                // if we tokenize CSS this might be a little more difficult to do
121
-                $struct->value = array($cssprop, 'PUT VALUE HERE');
122
-            }
123
-        }
124
-
125
-        // Ok, structs are all setup, now time to register the error
126
-        $struct->addError($severity, $msg);
127
-    }
128
-
129
-    /**
130
-     * Retrieves raw error data for custom formatter to use
131
-     * @param List of arrays in format of array(line of error,
132
-     *        error severity, error message,
133
-     *        recursive sub-errors array)
134
-     */
135
-    public function getRaw() {
136
-        return $this->errors;
137
-    }
138
-
139
-    /**
140
-     * Default HTML formatting implementation for error messages
141
-     * @param $config Configuration array, vital for HTML output nature
142
-     * @param $errors Errors array to display; used for recursion.
143
-     */
144
-    public function getHTMLFormatted($config, $errors = null) {
145
-        $ret = array();
146
-
147
-        $this->generator = new HTMLPurifier_Generator($config, $this->context);
148
-        if ($errors === null) $errors = $this->errors;
149
-
150
-        // 'At line' message needs to be removed
151
-
152
-        // generation code for new structure goes here. It needs to be recursive.
153
-        foreach ($this->lines as $line => $col_array) {
154
-            if ($line == -1) continue;
155
-            foreach ($col_array as $col => $struct) {
156
-                $this->_renderStruct($ret, $struct, $line, $col);
157
-            }
158
-        }
159
-        if (isset($this->lines[-1])) {
160
-            $this->_renderStruct($ret, $this->lines[-1]);
161
-        }
162
-
163
-        if (empty($errors)) {
164
-            return '<p>' . $this->locale->getMessage('ErrorCollector: No errors') . '</p>';
165
-        } else {
166
-            return '<ul><li>' . implode('</li><li>', $ret) . '</li></ul>';
167
-        }
168
-
169
-    }
170
-
171
-    private function _renderStruct(&$ret, $struct, $line = null, $col = null) {
172
-        $stack = array($struct);
173
-        $context_stack = array(array());
174
-        while ($current = array_pop($stack)) {
175
-            $context = array_pop($context_stack);
176
-            foreach ($current->errors as $error) {
177
-                list($severity, $msg) = $error;
178
-                $string = '';
179
-                $string .= '<div>';
180
-                // W3C uses an icon to indicate the severity of the error.
181
-                $error = $this->locale->getErrorName($severity);
182
-                $string .= "<span class=\"error e$severity\"><strong>$error</strong></span> ";
183
-                if (!is_null($line) && !is_null($col)) {
184
-                    $string .= "<em class=\"location\">Line $line, Column $col: </em> ";
185
-                } else {
186
-                    $string .= '<em class="location">End of Document: </em> ';
187
-                }
188
-                $string .= '<strong class="description">' . $this->generator->escape($msg) . '</strong> ';
189
-                $string .= '</div>';
190
-                // Here, have a marker for the character on the column appropriate.
191
-                // Be sure to clip extremely long lines.
192
-                //$string .= '<pre>';
193
-                //$string .= '';
194
-                //$string .= '</pre>';
195
-                $ret[] = $string;
196
-            }
197
-            foreach ($current->children as $type => $array) {
198
-                $context[] = $current;
199
-                $stack = array_merge($stack, array_reverse($array, true));
200
-                for ($i = count($array); $i > 0; $i--) {
201
-                    $context_stack[] = $context;
202
-                }
203
-            }
204
-        }
205
-    }
10
+	/**
11
+	 * Identifiers for the returned error array. These are purposely numeric
12
+	 * so list() can be used.
13
+	 */
14
+	const LINENO   = 0;
15
+	const SEVERITY = 1;
16
+	const MESSAGE  = 2;
17
+	const CHILDREN = 3;
18
+
19
+	protected $errors;
20
+	protected $_current;
21
+	protected $_stacks = array(array());
22
+	protected $locale;
23
+	protected $generator;
24
+	protected $context;
25
+
26
+	protected $lines = array();
27
+
28
+	public function __construct($context) {
29
+		$this->locale    =& $context->get('Locale');
30
+		$this->context   = $context;
31
+		$this->_current  =& $this->_stacks[0];
32
+		$this->errors    =& $this->_stacks[0];
33
+	}
34
+
35
+	/**
36
+	 * Sends an error message to the collector for later use
37
+	 * @param $severity int Error severity, PHP error style (don't use E_USER_)
38
+	 * @param $msg string Error message text
39
+	 * @param $subst1 string First substitution for $msg
40
+	 * @param $subst2 string ...
41
+	 */
42
+	public function send($severity, $msg) {
43
+
44
+		$args = array();
45
+		if (func_num_args() > 2) {
46
+			$args = func_get_args();
47
+			array_shift($args);
48
+			unset($args[0]);
49
+		}
50
+
51
+		$token = $this->context->get('CurrentToken', true);
52
+		$line  = $token ? $token->line : $this->context->get('CurrentLine', true);
53
+		$col   = $token ? $token->col  : $this->context->get('CurrentCol',  true);
54
+		$attr  = $this->context->get('CurrentAttr', true);
55
+
56
+		// perform special substitutions, also add custom parameters
57
+		$subst = array();
58
+		if (!is_null($token)) {
59
+			$args['CurrentToken'] = $token;
60
+		}
61
+		if (!is_null($attr)) {
62
+			$subst['$CurrentAttr.Name'] = $attr;
63
+			if (isset($token->attr[$attr])) $subst['$CurrentAttr.Value'] = $token->attr[$attr];
64
+		}
65
+
66
+		if (empty($args)) {
67
+			$msg = $this->locale->getMessage($msg);
68
+		} else {
69
+			$msg = $this->locale->formatMessage($msg, $args);
70
+		}
71
+
72
+		if (!empty($subst)) $msg = strtr($msg, $subst);
73
+
74
+		// (numerically indexed)
75
+		$error = array(
76
+			self::LINENO   => $line,
77
+			self::SEVERITY => $severity,
78
+			self::MESSAGE  => $msg,
79
+			self::CHILDREN => array()
80
+		);
81
+		$this->_current[] = $error;
82
+
83
+
84
+		// NEW CODE BELOW ...
85
+
86
+		$struct = null;
87
+		// Top-level errors are either:
88
+		//  TOKEN type, if $value is set appropriately, or
89
+		//  "syntax" type, if $value is null
90
+		$new_struct = new HTMLPurifier_ErrorStruct();
91
+		$new_struct->type = HTMLPurifier_ErrorStruct::TOKEN;
92
+		if ($token) $new_struct->value = clone $token;
93
+		if (is_int($line) && is_int($col)) {
94
+			if (isset($this->lines[$line][$col])) {
95
+				$struct = $this->lines[$line][$col];
96
+			} else {
97
+				$struct = $this->lines[$line][$col] = $new_struct;
98
+			}
99
+			// These ksorts may present a performance problem
100
+			ksort($this->lines[$line], SORT_NUMERIC);
101
+		} else {
102
+			if (isset($this->lines[-1])) {
103
+				$struct = $this->lines[-1];
104
+			} else {
105
+				$struct = $this->lines[-1] = $new_struct;
106
+			}
107
+		}
108
+		ksort($this->lines, SORT_NUMERIC);
109
+
110
+		// Now, check if we need to operate on a lower structure
111
+		if (!empty($attr)) {
112
+			$struct = $struct->getChild(HTMLPurifier_ErrorStruct::ATTR, $attr);
113
+			if (!$struct->value) {
114
+				$struct->value = array($attr, 'PUT VALUE HERE');
115
+			}
116
+		}
117
+		if (!empty($cssprop)) {
118
+			$struct = $struct->getChild(HTMLPurifier_ErrorStruct::CSSPROP, $cssprop);
119
+			if (!$struct->value) {
120
+				// if we tokenize CSS this might be a little more difficult to do
121
+				$struct->value = array($cssprop, 'PUT VALUE HERE');
122
+			}
123
+		}
124
+
125
+		// Ok, structs are all setup, now time to register the error
126
+		$struct->addError($severity, $msg);
127
+	}
128
+
129
+	/**
130
+	 * Retrieves raw error data for custom formatter to use
131
+	 * @param List of arrays in format of array(line of error,
132
+	 *        error severity, error message,
133
+	 *        recursive sub-errors array)
134
+	 */
135
+	public function getRaw() {
136
+		return $this->errors;
137
+	}
138
+
139
+	/**
140
+	 * Default HTML formatting implementation for error messages
141
+	 * @param $config Configuration array, vital for HTML output nature
142
+	 * @param $errors Errors array to display; used for recursion.
143
+	 */
144
+	public function getHTMLFormatted($config, $errors = null) {
145
+		$ret = array();
146
+
147
+		$this->generator = new HTMLPurifier_Generator($config, $this->context);
148
+		if ($errors === null) $errors = $this->errors;
149
+
150
+		// 'At line' message needs to be removed
151
+
152
+		// generation code for new structure goes here. It needs to be recursive.
153
+		foreach ($this->lines as $line => $col_array) {
154
+			if ($line == -1) continue;
155
+			foreach ($col_array as $col => $struct) {
156
+				$this->_renderStruct($ret, $struct, $line, $col);
157
+			}
158
+		}
159
+		if (isset($this->lines[-1])) {
160
+			$this->_renderStruct($ret, $this->lines[-1]);
161
+		}
162
+
163
+		if (empty($errors)) {
164
+			return '<p>' . $this->locale->getMessage('ErrorCollector: No errors') . '</p>';
165
+		} else {
166
+			return '<ul><li>' . implode('</li><li>', $ret) . '</li></ul>';
167
+		}
168
+
169
+	}
170
+
171
+	private function _renderStruct(&$ret, $struct, $line = null, $col = null) {
172
+		$stack = array($struct);
173
+		$context_stack = array(array());
174
+		while ($current = array_pop($stack)) {
175
+			$context = array_pop($context_stack);
176
+			foreach ($current->errors as $error) {
177
+				list($severity, $msg) = $error;
178
+				$string = '';
179
+				$string .= '<div>';
180
+				// W3C uses an icon to indicate the severity of the error.
181
+				$error = $this->locale->getErrorName($severity);
182
+				$string .= "<span class=\"error e$severity\"><strong>$error</strong></span> ";
183
+				if (!is_null($line) && !is_null($col)) {
184
+					$string .= "<em class=\"location\">Line $line, Column $col: </em> ";
185
+				} else {
186
+					$string .= '<em class="location">End of Document: </em> ';
187
+				}
188
+				$string .= '<strong class="description">' . $this->generator->escape($msg) . '</strong> ';
189
+				$string .= '</div>';
190
+				// Here, have a marker for the character on the column appropriate.
191
+				// Be sure to clip extremely long lines.
192
+				//$string .= '<pre>';
193
+				//$string .= '';
194
+				//$string .= '</pre>';
195
+				$ret[] = $string;
196
+			}
197
+			foreach ($current->children as $type => $array) {
198
+				$context[] = $current;
199
+				$stack = array_merge($stack, array_reverse($array, true));
200
+				for ($i = count($array); $i > 0; $i--) {
201
+					$context_stack[] = $context;
202
+				}
203
+			}
204
+		}
205
+	}
206 206
 
207 207
 }
208 208
 
Please login to merge, or discard this patch.
Braces   +15 added lines, -5 removed lines patch added patch discarded remove patch
@@ -60,7 +60,9 @@  discard block
 block discarded – undo
60 60
         }
61 61
         if (!is_null($attr)) {
62 62
             $subst['$CurrentAttr.Name'] = $attr;
63
-            if (isset($token->attr[$attr])) $subst['$CurrentAttr.Value'] = $token->attr[$attr];
63
+            if (isset($token->attr[$attr])) {
64
+            	$subst['$CurrentAttr.Value'] = $token->attr[$attr];
65
+            }
64 66
         }
65 67
 
66 68
         if (empty($args)) {
@@ -69,7 +71,9 @@  discard block
 block discarded – undo
69 71
             $msg = $this->locale->formatMessage($msg, $args);
70 72
         }
71 73
 
72
-        if (!empty($subst)) $msg = strtr($msg, $subst);
74
+        if (!empty($subst)) {
75
+        	$msg = strtr($msg, $subst);
76
+        }
73 77
 
74 78
         // (numerically indexed)
75 79
         $error = array(
@@ -89,7 +93,9 @@  discard block
 block discarded – undo
89 93
         //  "syntax" type, if $value is null
90 94
         $new_struct = new HTMLPurifier_ErrorStruct();
91 95
         $new_struct->type = HTMLPurifier_ErrorStruct::TOKEN;
92
-        if ($token) $new_struct->value = clone $token;
96
+        if ($token) {
97
+        	$new_struct->value = clone $token;
98
+        }
93 99
         if (is_int($line) && is_int($col)) {
94 100
             if (isset($this->lines[$line][$col])) {
95 101
                 $struct = $this->lines[$line][$col];
@@ -145,13 +151,17 @@  discard block
 block discarded – undo
145 151
         $ret = array();
146 152
 
147 153
         $this->generator = new HTMLPurifier_Generator($config, $this->context);
148
-        if ($errors === null) $errors = $this->errors;
154
+        if ($errors === null) {
155
+        	$errors = $this->errors;
156
+        }
149 157
 
150 158
         // 'At line' message needs to be removed
151 159
 
152 160
         // generation code for new structure goes here. It needs to be recursive.
153 161
         foreach ($this->lines as $line => $col_array) {
154
-            if ($line == -1) continue;
162
+            if ($line == -1) {
163
+            	continue;
164
+            }
155 165
             foreach ($col_array as $col => $struct) {
156 166
                 $this->_renderStruct($ret, $struct, $line, $col);
157 167
             }
Please login to merge, or discard this patch.
Spacing   +7 added lines, -7 removed lines patch added patch discarded remove patch
@@ -26,10 +26,10 @@  discard block
 block discarded – undo
26 26
     protected $lines = array();
27 27
 
28 28
     public function __construct($context) {
29
-        $this->locale    =& $context->get('Locale');
29
+        $this->locale    = & $context->get('Locale');
30 30
         $this->context   = $context;
31
-        $this->_current  =& $this->_stacks[0];
32
-        $this->errors    =& $this->_stacks[0];
31
+        $this->_current  = & $this->_stacks[0];
32
+        $this->errors    = & $this->_stacks[0];
33 33
     }
34 34
 
35 35
     /**
@@ -50,7 +50,7 @@  discard block
 block discarded – undo
50 50
 
51 51
         $token = $this->context->get('CurrentToken', true);
52 52
         $line  = $token ? $token->line : $this->context->get('CurrentLine', true);
53
-        $col   = $token ? $token->col  : $this->context->get('CurrentCol',  true);
53
+        $col   = $token ? $token->col : $this->context->get('CurrentCol', true);
54 54
         $attr  = $this->context->get('CurrentAttr', true);
55 55
 
56 56
         // perform special substitutions, also add custom parameters
@@ -161,9 +161,9 @@  discard block
 block discarded – undo
161 161
         }
162 162
 
163 163
         if (empty($errors)) {
164
-            return '<p>' . $this->locale->getMessage('ErrorCollector: No errors') . '</p>';
164
+            return '<p>'.$this->locale->getMessage('ErrorCollector: No errors').'</p>';
165 165
         } else {
166
-            return '<ul><li>' . implode('</li><li>', $ret) . '</li></ul>';
166
+            return '<ul><li>'.implode('</li><li>', $ret).'</li></ul>';
167 167
         }
168 168
 
169 169
     }
@@ -185,7 +185,7 @@  discard block
 block discarded – undo
185 185
                 } else {
186 186
                     $string .= '<em class="location">End of Document: </em> ';
187 187
                 }
188
-                $string .= '<strong class="description">' . $this->generator->escape($msg) . '</strong> ';
188
+                $string .= '<strong class="description">'.$this->generator->escape($msg).'</strong> ';
189 189
                 $string .= '</div>';
190 190
                 // Here, have a marker for the character on the column appropriate.
191 191
                 // Be sure to clip extremely long lines.
Please login to merge, or discard this patch.
classes/security/htmlpurifier/library/HTMLPurifier/Generator.php 4 patches
Doc Comments   +4 added lines, -4 removed lines patch added patch discarded remove patch
@@ -70,7 +70,7 @@  discard block
 block discarded – undo
70 70
      * Generates HTML from an array of tokens.
71 71
      * @param $tokens Array of HTMLPurifier_Token
72 72
      * @param $config HTMLPurifier_Config object
73
-     * @return Generated HTML
73
+     * @return string HTML
74 74
      */
75 75
     public function generateFromTokens($tokens) {
76 76
         if (!$tokens) return '';
@@ -115,7 +115,7 @@  discard block
 block discarded – undo
115 115
     /**
116 116
      * Generates HTML from a single token.
117 117
      * @param $token HTMLPurifier_Token object.
118
-     * @return Generated HTML
118
+     * @return string HTML
119 119
      */
120 120
     public function generateFromToken($token) {
121 121
         if (!$token instanceof HTMLPurifier_Token) {
@@ -181,7 +181,7 @@  discard block
 block discarded – undo
181 181
      * @param $assoc_array_of_attributes Attribute array
182 182
      * @param $element Name of element attributes are for, used to check
183 183
      *        attribute minimization.
184
-     * @return Generate HTML fragment for insertion.
184
+     * @return string HTML fragment for insertion.
185 185
      */
186 186
     public function generateAttributes($assoc_array_of_attributes, $element = false) {
187 187
         $html = '';
@@ -238,7 +238,7 @@  discard block
 block discarded – undo
238 238
      *       for properly generating HTML here w/o using tokens, it stays
239 239
      *       public.
240 240
      * @param $string String data to escape for HTML.
241
-     * @param $quote Quoting style, like htmlspecialchars. ENT_NOQUOTES is
241
+     * @param integer $quote Quoting style, like htmlspecialchars. ENT_NOQUOTES is
242 242
      *               permissible for non-attribute output.
243 243
      * @return String escaped data.
244 244
      */
Please login to merge, or discard this patch.
Indentation   +238 added lines, -238 removed lines patch added patch discarded remove patch
@@ -10,244 +10,244 @@
 block discarded – undo
10 10
 class HTMLPurifier_Generator
11 11
 {
12 12
 
13
-    /**
14
-     * Whether or not generator should produce XML output
15
-     */
16
-    private $_xhtml = true;
17
-
18
-    /**
19
-     * :HACK: Whether or not generator should comment the insides of <script> tags
20
-     */
21
-    private $_scriptFix = false;
22
-
23
-    /**
24
-     * Cache of HTMLDefinition during HTML output to determine whether or
25
-     * not attributes should be minimized.
26
-     */
27
-    private $_def;
28
-
29
-    /**
30
-     * Cache of %Output.SortAttr
31
-     */
32
-    private $_sortAttr;
33
-
34
-    /**
35
-     * Cache of %Output.FlashCompat
36
-     */
37
-    private $_flashCompat;
38
-
39
-    /**
40
-     * Cache of %Output.FixInnerHTML
41
-     */
42
-    private $_innerHTMLFix;
43
-
44
-    /**
45
-     * Stack for keeping track of object information when outputting IE
46
-     * compatibility code.
47
-     */
48
-    private $_flashStack = array();
49
-
50
-    /**
51
-     * Configuration for the generator
52
-     */
53
-    protected $config;
54
-
55
-    /**
56
-     * @param $config Instance of HTMLPurifier_Config
57
-     * @param $context Instance of HTMLPurifier_Context
58
-     */
59
-    public function __construct($config, $context) {
60
-        $this->config = $config;
61
-        $this->_scriptFix = $config->get('Output.CommentScriptContents');
62
-        $this->_innerHTMLFix = $config->get('Output.FixInnerHTML');
63
-        $this->_sortAttr = $config->get('Output.SortAttr');
64
-        $this->_flashCompat = $config->get('Output.FlashCompat');
65
-        $this->_def = $config->getHTMLDefinition();
66
-        $this->_xhtml = $this->_def->doctype->xml;
67
-    }
68
-
69
-    /**
70
-     * Generates HTML from an array of tokens.
71
-     * @param $tokens Array of HTMLPurifier_Token
72
-     * @param $config HTMLPurifier_Config object
73
-     * @return Generated HTML
74
-     */
75
-    public function generateFromTokens($tokens) {
76
-        if (!$tokens) return '';
77
-
78
-        // Basic algorithm
79
-        $html = '';
80
-        for ($i = 0, $size = count($tokens); $i < $size; $i++) {
81
-            if ($this->_scriptFix && $tokens[$i]->name === 'script'
82
-                && $i + 2 < $size && $tokens[$i+2] instanceof HTMLPurifier_Token_End) {
83
-                // script special case
84
-                // the contents of the script block must be ONE token
85
-                // for this to work.
86
-                $html .= $this->generateFromToken($tokens[$i++]);
87
-                $html .= $this->generateScriptFromToken($tokens[$i++]);
88
-            }
89
-            $html .= $this->generateFromToken($tokens[$i]);
90
-        }
91
-
92
-        // Tidy cleanup
93
-        if (extension_loaded('tidy') && $this->config->get('Output.TidyFormat')) {
94
-            $tidy = new Tidy;
95
-            $tidy->parseString($html, array(
96
-               'indent'=> true,
97
-               'output-xhtml' => $this->_xhtml,
98
-               'show-body-only' => true,
99
-               'indent-spaces' => 2,
100
-               'wrap' => 68,
101
-            ), 'utf8');
102
-            $tidy->cleanRepair();
103
-            $html = (string) $tidy; // explicit cast necessary
104
-        }
105
-
106
-        // Normalize newlines to system defined value
107
-        if ($this->config->get('Core.NormalizeNewlines')) {
108
-            $nl = $this->config->get('Output.Newline');
109
-            if ($nl === null) $nl = PHP_EOL;
110
-            if ($nl !== "\n") $html = str_replace("\n", $nl, $html);
111
-        }
112
-        return $html;
113
-    }
114
-
115
-    /**
116
-     * Generates HTML from a single token.
117
-     * @param $token HTMLPurifier_Token object.
118
-     * @return Generated HTML
119
-     */
120
-    public function generateFromToken($token) {
121
-        if (!$token instanceof HTMLPurifier_Token) {
122
-            trigger_error('Cannot generate HTML from non-HTMLPurifier_Token object', E_USER_WARNING);
123
-            return '';
124
-
125
-        } elseif ($token instanceof HTMLPurifier_Token_Start) {
126
-            $attr = $this->generateAttributes($token->attr, $token->name);
127
-            if ($this->_flashCompat) {
128
-                if ($token->name == "object") {
129
-                    $flash = new stdclass();
130
-                    $flash->attr = $token->attr;
131
-                    $flash->param = array();
132
-                    $this->_flashStack[] = $flash;
133
-                }
134
-            }
135
-            return '<' . $token->name . ($attr ? ' ' : '') . $attr . '>';
136
-
137
-        } elseif ($token instanceof HTMLPurifier_Token_End) {
138
-            $_extra = '';
139
-            if ($this->_flashCompat) {
140
-                if ($token->name == "object" && !empty($this->_flashStack)) {
141
-                    // doesn't do anything for now
142
-                }
143
-            }
144
-            return $_extra . '</' . $token->name . '>';
145
-
146
-        } elseif ($token instanceof HTMLPurifier_Token_Empty) {
147
-            if ($this->_flashCompat && $token->name == "param" && !empty($this->_flashStack)) {
148
-                $this->_flashStack[count($this->_flashStack)-1]->param[$token->attr['name']] = $token->attr['value'];
149
-            }
150
-            $attr = $this->generateAttributes($token->attr, $token->name);
151
-             return '<' . $token->name . ($attr ? ' ' : '') . $attr .
152
-                ( $this->_xhtml ? ' /': '' ) // <br /> v. <br>
153
-                . '>';
154
-
155
-        } elseif ($token instanceof HTMLPurifier_Token_Text) {
156
-            return $this->escape($token->data, ENT_NOQUOTES);
157
-
158
-        } elseif ($token instanceof HTMLPurifier_Token_Comment) {
159
-            return '<!--' . $token->data . '-->';
160
-        } else {
161
-            return '';
162
-
163
-        }
164
-    }
165
-
166
-    /**
167
-     * Special case processor for the contents of script tags
168
-     * @warning This runs into problems if there's already a literal
169
-     *          --> somewhere inside the script contents.
170
-     */
171
-    public function generateScriptFromToken($token) {
172
-        if (!$token instanceof HTMLPurifier_Token_Text) return $this->generateFromToken($token);
173
-        // Thanks <http://lachy.id.au/log/2005/05/script-comments>
174
-        $data = preg_replace('#//\s*$#', '', $token->data);
175
-        return '<!--//--><![CDATA[//><!--' . "\n" . trim($data) . "\n" . '//--><!]]>';
176
-    }
177
-
178
-    /**
179
-     * Generates attribute declarations from attribute array.
180
-     * @note This does not include the leading or trailing space.
181
-     * @param $assoc_array_of_attributes Attribute array
182
-     * @param $element Name of element attributes are for, used to check
183
-     *        attribute minimization.
184
-     * @return Generate HTML fragment for insertion.
185
-     */
186
-    public function generateAttributes($assoc_array_of_attributes, $element = false) {
187
-        $html = '';
188
-        if ($this->_sortAttr) ksort($assoc_array_of_attributes);
189
-        foreach ($assoc_array_of_attributes as $key => $value) {
190
-            if (!$this->_xhtml) {
191
-                // Remove namespaced attributes
192
-                if (strpos($key, ':') !== false) continue;
193
-                // Check if we should minimize the attribute: val="val" -> val
194
-                if ($element && !empty($this->_def->info[$element]->attr[$key]->minimized)) {
195
-                    $html .= $key . ' ';
196
-                    continue;
197
-                }
198
-            }
199
-            // Workaround for Internet Explorer innerHTML bug.
200
-            // Essentially, Internet Explorer, when calculating
201
-            // innerHTML, omits quotes if there are no instances of
202
-            // angled brackets, quotes or spaces.  However, when parsing
203
-            // HTML (for example, when you assign to innerHTML), it
204
-            // treats backticks as quotes.  Thus,
205
-            //      <img alt="``" />
206
-            // becomes
207
-            //      <img alt=`` />
208
-            // becomes
209
-            //      <img alt='' />
210
-            // Fortunately, all we need to do is trigger an appropriate
211
-            // quoting style, which we do by adding an extra space.
212
-            // This also is consistent with the W3C spec, which states
213
-            // that user agents may ignore leading or trailing
214
-            // whitespace (in fact, most don't, at least for attributes
215
-            // like alt, but an extra space at the end is barely
216
-            // noticeable).  Still, we have a configuration knob for
217
-            // this, since this transformation is not necesary if you
218
-            // don't process user input with innerHTML or you don't plan
219
-            // on supporting Internet Explorer.
220
-            if ($this->_innerHTMLFix) {
221
-                if (strpos($value, '`') !== false) {
222
-                    // check if correct quoting style would not already be
223
-                    // triggered
224
-                    if (strcspn($value, '"\' <>') === strlen($value)) {
225
-                        // protect!
226
-                        $value .= ' ';
227
-                    }
228
-                }
229
-            }
230
-            $html .= $key.'="'.$this->escape($value).'" ';
231
-        }
232
-        return rtrim($html);
233
-    }
234
-
235
-    /**
236
-     * Escapes raw text data.
237
-     * @todo This really ought to be protected, but until we have a facility
238
-     *       for properly generating HTML here w/o using tokens, it stays
239
-     *       public.
240
-     * @param $string String data to escape for HTML.
241
-     * @param $quote Quoting style, like htmlspecialchars. ENT_NOQUOTES is
242
-     *               permissible for non-attribute output.
243
-     * @return String escaped data.
244
-     */
245
-    public function escape($string, $quote = null) {
246
-        // Workaround for APC bug on Mac Leopard reported by sidepodcast
247
-        // http://htmlpurifier.org/phorum/read.php?3,4823,4846
248
-        if ($quote === null) $quote = ENT_COMPAT;
249
-        return htmlspecialchars($string, $quote, 'UTF-8', false);
250
-    }
13
+	/**
14
+	 * Whether or not generator should produce XML output
15
+	 */
16
+	private $_xhtml = true;
17
+
18
+	/**
19
+	 * :HACK: Whether or not generator should comment the insides of <script> tags
20
+	 */
21
+	private $_scriptFix = false;
22
+
23
+	/**
24
+	 * Cache of HTMLDefinition during HTML output to determine whether or
25
+	 * not attributes should be minimized.
26
+	 */
27
+	private $_def;
28
+
29
+	/**
30
+	 * Cache of %Output.SortAttr
31
+	 */
32
+	private $_sortAttr;
33
+
34
+	/**
35
+	 * Cache of %Output.FlashCompat
36
+	 */
37
+	private $_flashCompat;
38
+
39
+	/**
40
+	 * Cache of %Output.FixInnerHTML
41
+	 */
42
+	private $_innerHTMLFix;
43
+
44
+	/**
45
+	 * Stack for keeping track of object information when outputting IE
46
+	 * compatibility code.
47
+	 */
48
+	private $_flashStack = array();
49
+
50
+	/**
51
+	 * Configuration for the generator
52
+	 */
53
+	protected $config;
54
+
55
+	/**
56
+	 * @param $config Instance of HTMLPurifier_Config
57
+	 * @param $context Instance of HTMLPurifier_Context
58
+	 */
59
+	public function __construct($config, $context) {
60
+		$this->config = $config;
61
+		$this->_scriptFix = $config->get('Output.CommentScriptContents');
62
+		$this->_innerHTMLFix = $config->get('Output.FixInnerHTML');
63
+		$this->_sortAttr = $config->get('Output.SortAttr');
64
+		$this->_flashCompat = $config->get('Output.FlashCompat');
65
+		$this->_def = $config->getHTMLDefinition();
66
+		$this->_xhtml = $this->_def->doctype->xml;
67
+	}
68
+
69
+	/**
70
+	 * Generates HTML from an array of tokens.
71
+	 * @param $tokens Array of HTMLPurifier_Token
72
+	 * @param $config HTMLPurifier_Config object
73
+	 * @return Generated HTML
74
+	 */
75
+	public function generateFromTokens($tokens) {
76
+		if (!$tokens) return '';
77
+
78
+		// Basic algorithm
79
+		$html = '';
80
+		for ($i = 0, $size = count($tokens); $i < $size; $i++) {
81
+			if ($this->_scriptFix && $tokens[$i]->name === 'script'
82
+				&& $i + 2 < $size && $tokens[$i+2] instanceof HTMLPurifier_Token_End) {
83
+				// script special case
84
+				// the contents of the script block must be ONE token
85
+				// for this to work.
86
+				$html .= $this->generateFromToken($tokens[$i++]);
87
+				$html .= $this->generateScriptFromToken($tokens[$i++]);
88
+			}
89
+			$html .= $this->generateFromToken($tokens[$i]);
90
+		}
91
+
92
+		// Tidy cleanup
93
+		if (extension_loaded('tidy') && $this->config->get('Output.TidyFormat')) {
94
+			$tidy = new Tidy;
95
+			$tidy->parseString($html, array(
96
+			   'indent'=> true,
97
+			   'output-xhtml' => $this->_xhtml,
98
+			   'show-body-only' => true,
99
+			   'indent-spaces' => 2,
100
+			   'wrap' => 68,
101
+			), 'utf8');
102
+			$tidy->cleanRepair();
103
+			$html = (string) $tidy; // explicit cast necessary
104
+		}
105
+
106
+		// Normalize newlines to system defined value
107
+		if ($this->config->get('Core.NormalizeNewlines')) {
108
+			$nl = $this->config->get('Output.Newline');
109
+			if ($nl === null) $nl = PHP_EOL;
110
+			if ($nl !== "\n") $html = str_replace("\n", $nl, $html);
111
+		}
112
+		return $html;
113
+	}
114
+
115
+	/**
116
+	 * Generates HTML from a single token.
117
+	 * @param $token HTMLPurifier_Token object.
118
+	 * @return Generated HTML
119
+	 */
120
+	public function generateFromToken($token) {
121
+		if (!$token instanceof HTMLPurifier_Token) {
122
+			trigger_error('Cannot generate HTML from non-HTMLPurifier_Token object', E_USER_WARNING);
123
+			return '';
124
+
125
+		} elseif ($token instanceof HTMLPurifier_Token_Start) {
126
+			$attr = $this->generateAttributes($token->attr, $token->name);
127
+			if ($this->_flashCompat) {
128
+				if ($token->name == "object") {
129
+					$flash = new stdclass();
130
+					$flash->attr = $token->attr;
131
+					$flash->param = array();
132
+					$this->_flashStack[] = $flash;
133
+				}
134
+			}
135
+			return '<' . $token->name . ($attr ? ' ' : '') . $attr . '>';
136
+
137
+		} elseif ($token instanceof HTMLPurifier_Token_End) {
138
+			$_extra = '';
139
+			if ($this->_flashCompat) {
140
+				if ($token->name == "object" && !empty($this->_flashStack)) {
141
+					// doesn't do anything for now
142
+				}
143
+			}
144
+			return $_extra . '</' . $token->name . '>';
145
+
146
+		} elseif ($token instanceof HTMLPurifier_Token_Empty) {
147
+			if ($this->_flashCompat && $token->name == "param" && !empty($this->_flashStack)) {
148
+				$this->_flashStack[count($this->_flashStack)-1]->param[$token->attr['name']] = $token->attr['value'];
149
+			}
150
+			$attr = $this->generateAttributes($token->attr, $token->name);
151
+			 return '<' . $token->name . ($attr ? ' ' : '') . $attr .
152
+				( $this->_xhtml ? ' /': '' ) // <br /> v. <br>
153
+				. '>';
154
+
155
+		} elseif ($token instanceof HTMLPurifier_Token_Text) {
156
+			return $this->escape($token->data, ENT_NOQUOTES);
157
+
158
+		} elseif ($token instanceof HTMLPurifier_Token_Comment) {
159
+			return '<!--' . $token->data . '-->';
160
+		} else {
161
+			return '';
162
+
163
+		}
164
+	}
165
+
166
+	/**
167
+	 * Special case processor for the contents of script tags
168
+	 * @warning This runs into problems if there's already a literal
169
+	 *          --> somewhere inside the script contents.
170
+	 */
171
+	public function generateScriptFromToken($token) {
172
+		if (!$token instanceof HTMLPurifier_Token_Text) return $this->generateFromToken($token);
173
+		// Thanks <http://lachy.id.au/log/2005/05/script-comments>
174
+		$data = preg_replace('#//\s*$#', '', $token->data);
175
+		return '<!--//--><![CDATA[//><!--' . "\n" . trim($data) . "\n" . '//--><!]]>';
176
+	}
177
+
178
+	/**
179
+	 * Generates attribute declarations from attribute array.
180
+	 * @note This does not include the leading or trailing space.
181
+	 * @param $assoc_array_of_attributes Attribute array
182
+	 * @param $element Name of element attributes are for, used to check
183
+	 *        attribute minimization.
184
+	 * @return Generate HTML fragment for insertion.
185
+	 */
186
+	public function generateAttributes($assoc_array_of_attributes, $element = false) {
187
+		$html = '';
188
+		if ($this->_sortAttr) ksort($assoc_array_of_attributes);
189
+		foreach ($assoc_array_of_attributes as $key => $value) {
190
+			if (!$this->_xhtml) {
191
+				// Remove namespaced attributes
192
+				if (strpos($key, ':') !== false) continue;
193
+				// Check if we should minimize the attribute: val="val" -> val
194
+				if ($element && !empty($this->_def->info[$element]->attr[$key]->minimized)) {
195
+					$html .= $key . ' ';
196
+					continue;
197
+				}
198
+			}
199
+			// Workaround for Internet Explorer innerHTML bug.
200
+			// Essentially, Internet Explorer, when calculating
201
+			// innerHTML, omits quotes if there are no instances of
202
+			// angled brackets, quotes or spaces.  However, when parsing
203
+			// HTML (for example, when you assign to innerHTML), it
204
+			// treats backticks as quotes.  Thus,
205
+			//      <img alt="``" />
206
+			// becomes
207
+			//      <img alt=`` />
208
+			// becomes
209
+			//      <img alt='' />
210
+			// Fortunately, all we need to do is trigger an appropriate
211
+			// quoting style, which we do by adding an extra space.
212
+			// This also is consistent with the W3C spec, which states
213
+			// that user agents may ignore leading or trailing
214
+			// whitespace (in fact, most don't, at least for attributes
215
+			// like alt, but an extra space at the end is barely
216
+			// noticeable).  Still, we have a configuration knob for
217
+			// this, since this transformation is not necesary if you
218
+			// don't process user input with innerHTML or you don't plan
219
+			// on supporting Internet Explorer.
220
+			if ($this->_innerHTMLFix) {
221
+				if (strpos($value, '`') !== false) {
222
+					// check if correct quoting style would not already be
223
+					// triggered
224
+					if (strcspn($value, '"\' <>') === strlen($value)) {
225
+						// protect!
226
+						$value .= ' ';
227
+					}
228
+				}
229
+			}
230
+			$html .= $key.'="'.$this->escape($value).'" ';
231
+		}
232
+		return rtrim($html);
233
+	}
234
+
235
+	/**
236
+	 * Escapes raw text data.
237
+	 * @todo This really ought to be protected, but until we have a facility
238
+	 *       for properly generating HTML here w/o using tokens, it stays
239
+	 *       public.
240
+	 * @param $string String data to escape for HTML.
241
+	 * @param $quote Quoting style, like htmlspecialchars. ENT_NOQUOTES is
242
+	 *               permissible for non-attribute output.
243
+	 * @return String escaped data.
244
+	 */
245
+	public function escape($string, $quote = null) {
246
+		// Workaround for APC bug on Mac Leopard reported by sidepodcast
247
+		// http://htmlpurifier.org/phorum/read.php?3,4823,4846
248
+		if ($quote === null) $quote = ENT_COMPAT;
249
+		return htmlspecialchars($string, $quote, 'UTF-8', false);
250
+	}
251 251
 
252 252
 }
253 253
 
Please login to merge, or discard this patch.
Braces   +21 added lines, -7 removed lines patch added patch discarded remove patch
@@ -73,7 +73,9 @@  discard block
 block discarded – undo
73 73
      * @return Generated HTML
74 74
      */
75 75
     public function generateFromTokens($tokens) {
76
-        if (!$tokens) return '';
76
+        if (!$tokens) {
77
+        	return '';
78
+        }
77 79
 
78 80
         // Basic algorithm
79 81
         $html = '';
@@ -106,8 +108,12 @@  discard block
 block discarded – undo
106 108
         // Normalize newlines to system defined value
107 109
         if ($this->config->get('Core.NormalizeNewlines')) {
108 110
             $nl = $this->config->get('Output.Newline');
109
-            if ($nl === null) $nl = PHP_EOL;
110
-            if ($nl !== "\n") $html = str_replace("\n", $nl, $html);
111
+            if ($nl === null) {
112
+            	$nl = PHP_EOL;
113
+            }
114
+            if ($nl !== "\n") {
115
+            	$html = str_replace("\n", $nl, $html);
116
+            }
111 117
         }
112 118
         return $html;
113 119
     }
@@ -169,7 +175,9 @@  discard block
 block discarded – undo
169 175
      *          --> somewhere inside the script contents.
170 176
      */
171 177
     public function generateScriptFromToken($token) {
172
-        if (!$token instanceof HTMLPurifier_Token_Text) return $this->generateFromToken($token);
178
+        if (!$token instanceof HTMLPurifier_Token_Text) {
179
+        	return $this->generateFromToken($token);
180
+        }
173 181
         // Thanks <http://lachy.id.au/log/2005/05/script-comments>
174 182
         $data = preg_replace('#//\s*$#', '', $token->data);
175 183
         return '<!--//--><![CDATA[//><!--' . "\n" . trim($data) . "\n" . '//--><!]]>';
@@ -185,11 +193,15 @@  discard block
 block discarded – undo
185 193
      */
186 194
     public function generateAttributes($assoc_array_of_attributes, $element = false) {
187 195
         $html = '';
188
-        if ($this->_sortAttr) ksort($assoc_array_of_attributes);
196
+        if ($this->_sortAttr) {
197
+        	ksort($assoc_array_of_attributes);
198
+        }
189 199
         foreach ($assoc_array_of_attributes as $key => $value) {
190 200
             if (!$this->_xhtml) {
191 201
                 // Remove namespaced attributes
192
-                if (strpos($key, ':') !== false) continue;
202
+                if (strpos($key, ':') !== false) {
203
+                	continue;
204
+                }
193 205
                 // Check if we should minimize the attribute: val="val" -> val
194 206
                 if ($element && !empty($this->_def->info[$element]->attr[$key]->minimized)) {
195 207
                     $html .= $key . ' ';
@@ -245,7 +257,9 @@  discard block
 block discarded – undo
245 257
     public function escape($string, $quote = null) {
246 258
         // Workaround for APC bug on Mac Leopard reported by sidepodcast
247 259
         // http://htmlpurifier.org/phorum/read.php?3,4823,4846
248
-        if ($quote === null) $quote = ENT_COMPAT;
260
+        if ($quote === null) {
261
+        	$quote = ENT_COMPAT;
262
+        }
249 263
         return htmlspecialchars($string, $quote, 'UTF-8', false);
250 264
     }
251 265
 
Please login to merge, or discard this patch.
Spacing   +9 added lines, -9 removed lines patch added patch discarded remove patch
@@ -79,7 +79,7 @@  discard block
 block discarded – undo
79 79
         $html = '';
80 80
         for ($i = 0, $size = count($tokens); $i < $size; $i++) {
81 81
             if ($this->_scriptFix && $tokens[$i]->name === 'script'
82
-                && $i + 2 < $size && $tokens[$i+2] instanceof HTMLPurifier_Token_End) {
82
+                && $i + 2 < $size && $tokens[$i + 2] instanceof HTMLPurifier_Token_End) {
83 83
                 // script special case
84 84
                 // the contents of the script block must be ONE token
85 85
                 // for this to work.
@@ -132,7 +132,7 @@  discard block
 block discarded – undo
132 132
                     $this->_flashStack[] = $flash;
133 133
                 }
134 134
             }
135
-            return '<' . $token->name . ($attr ? ' ' : '') . $attr . '>';
135
+            return '<'.$token->name.($attr ? ' ' : '').$attr.'>';
136 136
 
137 137
         } elseif ($token instanceof HTMLPurifier_Token_End) {
138 138
             $_extra = '';
@@ -141,22 +141,22 @@  discard block
 block discarded – undo
141 141
                     // doesn't do anything for now
142 142
                 }
143 143
             }
144
-            return $_extra . '</' . $token->name . '>';
144
+            return $_extra.'</'.$token->name.'>';
145 145
 
146 146
         } elseif ($token instanceof HTMLPurifier_Token_Empty) {
147 147
             if ($this->_flashCompat && $token->name == "param" && !empty($this->_flashStack)) {
148
-                $this->_flashStack[count($this->_flashStack)-1]->param[$token->attr['name']] = $token->attr['value'];
148
+                $this->_flashStack[count($this->_flashStack) - 1]->param[$token->attr['name']] = $token->attr['value'];
149 149
             }
150 150
             $attr = $this->generateAttributes($token->attr, $token->name);
151
-             return '<' . $token->name . ($attr ? ' ' : '') . $attr .
152
-                ( $this->_xhtml ? ' /': '' ) // <br /> v. <br>
151
+             return '<'.$token->name.($attr ? ' ' : '').$attr.
152
+                ($this->_xhtml ? ' /' : '') // <br /> v. <br>
153 153
                 . '>';
154 154
 
155 155
         } elseif ($token instanceof HTMLPurifier_Token_Text) {
156 156
             return $this->escape($token->data, ENT_NOQUOTES);
157 157
 
158 158
         } elseif ($token instanceof HTMLPurifier_Token_Comment) {
159
-            return '<!--' . $token->data . '-->';
159
+            return '<!--'.$token->data.'-->';
160 160
         } else {
161 161
             return '';
162 162
 
@@ -172,7 +172,7 @@  discard block
 block discarded – undo
172 172
         if (!$token instanceof HTMLPurifier_Token_Text) return $this->generateFromToken($token);
173 173
         // Thanks <http://lachy.id.au/log/2005/05/script-comments>
174 174
         $data = preg_replace('#//\s*$#', '', $token->data);
175
-        return '<!--//--><![CDATA[//><!--' . "\n" . trim($data) . "\n" . '//--><!]]>';
175
+        return '<!--//--><![CDATA[//><!--'."\n".trim($data)."\n".'//--><!]]>';
176 176
     }
177 177
 
178 178
     /**
@@ -192,7 +192,7 @@  discard block
 block discarded – undo
192 192
                 if (strpos($key, ':') !== false) continue;
193 193
                 // Check if we should minimize the attribute: val="val" -> val
194 194
                 if ($element && !empty($this->_def->info[$element]->attr[$key]->minimized)) {
195
-                    $html .= $key . ' ';
195
+                    $html .= $key.' ';
196 196
                     continue;
197 197
                 }
198 198
             }
Please login to merge, or discard this patch.
classes/security/htmlpurifier/library/HTMLPurifier/HTMLDefinition.php 4 patches
Doc Comments   +1 added lines, -2 removed lines patch added patch discarded remove patch
@@ -385,8 +385,7 @@
 block discarded – undo
385 385
      * separate lists for processing. Format is element[attr1|attr2],element2...
386 386
      * @warning Although it's largely drawn from TinyMCE's implementation,
387 387
      *      it is different, and you'll probably have to modify your lists
388
-     * @param $list String list to parse
389
-     * @param array($allowed_elements, $allowed_attributes)
388
+     * @param string $list String list to parse
390 389
      * @todo Give this its own class, probably static interface
391 390
      */
392 391
     public function parseTinyMCEAllowedList($list) {
Please login to merge, or discard this patch.
Indentation   +392 added lines, -392 removed lines patch added patch discarded remove patch
@@ -26,398 +26,398 @@
 block discarded – undo
26 26
 class HTMLPurifier_HTMLDefinition extends HTMLPurifier_Definition
27 27
 {
28 28
 
29
-    // FULLY-PUBLIC VARIABLES ---------------------------------------------
30
-
31
-    /**
32
-     * Associative array of element names to HTMLPurifier_ElementDef
33
-     */
34
-    public $info = array();
35
-
36
-    /**
37
-     * Associative array of global attribute name to attribute definition.
38
-     */
39
-    public $info_global_attr = array();
40
-
41
-    /**
42
-     * String name of parent element HTML will be going into.
43
-     */
44
-    public $info_parent = 'div';
45
-
46
-    /**
47
-     * Definition for parent element, allows parent element to be a
48
-     * tag that's not allowed inside the HTML fragment.
49
-     */
50
-    public $info_parent_def;
51
-
52
-    /**
53
-     * String name of element used to wrap inline elements in block context
54
-     * @note This is rarely used except for BLOCKQUOTEs in strict mode
55
-     */
56
-    public $info_block_wrapper = 'p';
57
-
58
-    /**
59
-     * Associative array of deprecated tag name to HTMLPurifier_TagTransform
60
-     */
61
-    public $info_tag_transform = array();
62
-
63
-    /**
64
-     * Indexed list of HTMLPurifier_AttrTransform to be performed before validation.
65
-     */
66
-    public $info_attr_transform_pre = array();
67
-
68
-    /**
69
-     * Indexed list of HTMLPurifier_AttrTransform to be performed after validation.
70
-     */
71
-    public $info_attr_transform_post = array();
72
-
73
-    /**
74
-     * Nested lookup array of content set name (Block, Inline) to
75
-     * element name to whether or not it belongs in that content set.
76
-     */
77
-    public $info_content_sets = array();
78
-
79
-    /**
80
-     * Indexed list of HTMLPurifier_Injector to be used.
81
-     */
82
-    public $info_injector = array();
83
-
84
-    /**
85
-     * Doctype object
86
-     */
87
-    public $doctype;
88
-
89
-
90
-
91
-    // RAW CUSTOMIZATION STUFF --------------------------------------------
92
-
93
-    /**
94
-     * Adds a custom attribute to a pre-existing element
95
-     * @note This is strictly convenience, and does not have a corresponding
96
-     *       method in HTMLPurifier_HTMLModule
97
-     * @param $element_name String element name to add attribute to
98
-     * @param $attr_name String name of attribute
99
-     * @param $def Attribute definition, can be string or object, see
100
-     *             HTMLPurifier_AttrTypes for details
101
-     */
102
-    public function addAttribute($element_name, $attr_name, $def) {
103
-        $module = $this->getAnonymousModule();
104
-        if (!isset($module->info[$element_name])) {
105
-            $element = $module->addBlankElement($element_name);
106
-        } else {
107
-            $element = $module->info[$element_name];
108
-        }
109
-        $element->attr[$attr_name] = $def;
110
-    }
111
-
112
-    /**
113
-     * Adds a custom element to your HTML definition
114
-     * @note See HTMLPurifier_HTMLModule::addElement for detailed
115
-     *       parameter and return value descriptions.
116
-     */
117
-    public function addElement($element_name, $type, $contents, $attr_collections, $attributes = array()) {
118
-        $module = $this->getAnonymousModule();
119
-        // assume that if the user is calling this, the element
120
-        // is safe. This may not be a good idea
121
-        $element = $module->addElement($element_name, $type, $contents, $attr_collections, $attributes);
122
-        return $element;
123
-    }
124
-
125
-    /**
126
-     * Adds a blank element to your HTML definition, for overriding
127
-     * existing behavior
128
-     * @note See HTMLPurifier_HTMLModule::addBlankElement for detailed
129
-     *       parameter and return value descriptions.
130
-     */
131
-    public function addBlankElement($element_name) {
132
-        $module  = $this->getAnonymousModule();
133
-        $element = $module->addBlankElement($element_name);
134
-        return $element;
135
-    }
136
-
137
-    /**
138
-     * Retrieves a reference to the anonymous module, so you can
139
-     * bust out advanced features without having to make your own
140
-     * module.
141
-     */
142
-    public function getAnonymousModule() {
143
-        if (!$this->_anonModule) {
144
-            $this->_anonModule = new HTMLPurifier_HTMLModule();
145
-            $this->_anonModule->name = 'Anonymous';
146
-        }
147
-        return $this->_anonModule;
148
-    }
149
-
150
-    private $_anonModule = null;
151
-
152
-
153
-    // PUBLIC BUT INTERNAL VARIABLES --------------------------------------
154
-
155
-    public $type = 'HTML';
156
-    public $manager; /**< Instance of HTMLPurifier_HTMLModuleManager */
157
-
158
-    /**
159
-     * Performs low-cost, preliminary initialization.
160
-     */
161
-    public function __construct() {
162
-        $this->manager = new HTMLPurifier_HTMLModuleManager();
163
-    }
164
-
165
-    protected function doSetup($config) {
166
-        $this->processModules($config);
167
-        $this->setupConfigStuff($config);
168
-        unset($this->manager);
169
-
170
-        // cleanup some of the element definitions
171
-        foreach ($this->info as $k => $v) {
172
-            unset($this->info[$k]->content_model);
173
-            unset($this->info[$k]->content_model_type);
174
-        }
175
-    }
176
-
177
-    /**
178
-     * Extract out the information from the manager
179
-     */
180
-    protected function processModules($config) {
181
-
182
-        if ($this->_anonModule) {
183
-            // for user specific changes
184
-            // this is late-loaded so we don't have to deal with PHP4
185
-            // reference wonky-ness
186
-            $this->manager->addModule($this->_anonModule);
187
-            unset($this->_anonModule);
188
-        }
189
-
190
-        $this->manager->setup($config);
191
-        $this->doctype = $this->manager->doctype;
192
-
193
-        foreach ($this->manager->modules as $module) {
194
-            foreach($module->info_tag_transform as $k => $v) {
195
-                if ($v === false) unset($this->info_tag_transform[$k]);
196
-                else $this->info_tag_transform[$k] = $v;
197
-            }
198
-            foreach($module->info_attr_transform_pre as $k => $v) {
199
-                if ($v === false) unset($this->info_attr_transform_pre[$k]);
200
-                else $this->info_attr_transform_pre[$k] = $v;
201
-            }
202
-            foreach($module->info_attr_transform_post as $k => $v) {
203
-                if ($v === false) unset($this->info_attr_transform_post[$k]);
204
-                else $this->info_attr_transform_post[$k] = $v;
205
-            }
206
-            foreach ($module->info_injector as $k => $v) {
207
-                if ($v === false) unset($this->info_injector[$k]);
208
-                else $this->info_injector[$k] = $v;
209
-            }
210
-        }
211
-
212
-        $this->info = $this->manager->getElements();
213
-        $this->info_content_sets = $this->manager->contentSets->lookup;
214
-
215
-    }
216
-
217
-    /**
218
-     * Sets up stuff based on config. We need a better way of doing this.
219
-     */
220
-    protected function setupConfigStuff($config) {
221
-
222
-        $block_wrapper = $config->get('HTML.BlockWrapper');
223
-        if (isset($this->info_content_sets['Block'][$block_wrapper])) {
224
-            $this->info_block_wrapper = $block_wrapper;
225
-        } else {
226
-            trigger_error('Cannot use non-block element as block wrapper',
227
-                E_USER_ERROR);
228
-        }
229
-
230
-        $parent = $config->get('HTML.Parent');
231
-        $def = $this->manager->getElement($parent, true);
232
-        if ($def) {
233
-            $this->info_parent = $parent;
234
-            $this->info_parent_def = $def;
235
-        } else {
236
-            trigger_error('Cannot use unrecognized element as parent',
237
-                E_USER_ERROR);
238
-            $this->info_parent_def = $this->manager->getElement($this->info_parent, true);
239
-        }
240
-
241
-        // support template text
242
-        $support = "(for information on implementing this, see the ".
243
-                   "support forums) ";
244
-
245
-        // setup allowed elements -----------------------------------------
246
-
247
-        $allowed_elements = $config->get('HTML.AllowedElements');
248
-        $allowed_attributes = $config->get('HTML.AllowedAttributes'); // retrieve early
249
-
250
-        if (!is_array($allowed_elements) && !is_array($allowed_attributes)) {
251
-            $allowed = $config->get('HTML.Allowed');
252
-            if (is_string($allowed)) {
253
-                list($allowed_elements, $allowed_attributes) = $this->parseTinyMCEAllowedList($allowed);
254
-            }
255
-        }
256
-
257
-        if (is_array($allowed_elements)) {
258
-            foreach ($this->info as $name => $d) {
259
-                if(!isset($allowed_elements[$name])) unset($this->info[$name]);
260
-                unset($allowed_elements[$name]);
261
-            }
262
-            // emit errors
263
-            foreach ($allowed_elements as $element => $d) {
264
-                $element = htmlspecialchars($element, ENT_COMPAT | ENT_HTML401, 'UTF-8', false); // PHP doesn't escape errors, be careful!
265
-                trigger_error("Element '$element' is not supported $support", E_USER_WARNING);
266
-            }
267
-        }
268
-
269
-        // setup allowed attributes ---------------------------------------
270
-
271
-        $allowed_attributes_mutable = $allowed_attributes; // by copy!
272
-        if (is_array($allowed_attributes)) {
273
-
274
-            // This actually doesn't do anything, since we went away from
275
-            // global attributes. It's possible that userland code uses
276
-            // it, but HTMLModuleManager doesn't!
277
-            foreach ($this->info_global_attr as $attr => $x) {
278
-                $keys = array($attr, "*@$attr", "*.$attr");
279
-                $delete = true;
280
-                foreach ($keys as $key) {
281
-                    if ($delete && isset($allowed_attributes[$key])) {
282
-                        $delete = false;
283
-                    }
284
-                    if (isset($allowed_attributes_mutable[$key])) {
285
-                        unset($allowed_attributes_mutable[$key]);
286
-                    }
287
-                }
288
-                if ($delete) unset($this->info_global_attr[$attr]);
289
-            }
290
-
291
-            foreach ($this->info as $tag => $info) {
292
-                foreach ($info->attr as $attr => $x) {
293
-                    $keys = array("$tag@$attr", $attr, "*@$attr", "$tag.$attr", "*.$attr");
294
-                    $delete = true;
295
-                    foreach ($keys as $key) {
296
-                        if ($delete && isset($allowed_attributes[$key])) {
297
-                            $delete = false;
298
-                        }
299
-                        if (isset($allowed_attributes_mutable[$key])) {
300
-                            unset($allowed_attributes_mutable[$key]);
301
-                        }
302
-                    }
303
-                    if ($delete) {
304
-                        if ($this->info[$tag]->attr[$attr]->required) {
305
-                            trigger_error("Required attribute '$attr' in element '$tag' was not allowed, which means '$tag' will not be allowed either", E_USER_WARNING);
306
-                        }
307
-                        unset($this->info[$tag]->attr[$attr]);
308
-                    }
309
-                }
310
-            }
311
-            // emit errors
312
-            foreach ($allowed_attributes_mutable as $elattr => $d) {
313
-                $bits = preg_split('/[.@]/', $elattr, 2);
314
-                $c = count($bits);
315
-                switch ($c) {
316
-                    case 2:
317
-                        if ($bits[0] !== '*') {
318
-                            $element = htmlspecialchars($bits[0], ENT_COMPAT | ENT_HTML401, 'UTF-8', false);
319
-                            $attribute = htmlspecialchars($bits[1], ENT_COMPAT | ENT_HTML401, 'UTF-8', false);
320
-                            if (!isset($this->info[$element])) {
321
-                                trigger_error("Cannot allow attribute '$attribute' if element '$element' is not allowed/supported $support");
322
-                            } else {
323
-                                trigger_error("Attribute '$attribute' in element '$element' not supported $support",
324
-                                    E_USER_WARNING);
325
-                            }
326
-                            break;
327
-                        }
328
-                        // otherwise fall through
329
-                    case 1:
330
-                        $attribute = htmlspecialchars($bits[0], ENT_COMPAT | ENT_HTML401, 'UTF-8', false);
331
-                        trigger_error("Global attribute '$attribute' is not ".
332
-                            "supported in any elements $support",
333
-                            E_USER_WARNING);
334
-                        break;
335
-                }
336
-            }
337
-
338
-        }
339
-
340
-        // setup forbidden elements ---------------------------------------
341
-
342
-        $forbidden_elements   = $config->get('HTML.ForbiddenElements');
343
-        $forbidden_attributes = $config->get('HTML.ForbiddenAttributes');
344
-
345
-        foreach ($this->info as $tag => $info) {
346
-            if (isset($forbidden_elements[$tag])) {
347
-                unset($this->info[$tag]);
348
-                continue;
349
-            }
350
-            foreach ($info->attr as $attr => $x) {
351
-                if (
352
-                    isset($forbidden_attributes["$tag@$attr"]) ||
353
-                    isset($forbidden_attributes["*@$attr"]) ||
354
-                    isset($forbidden_attributes[$attr])
355
-                ) {
356
-                    unset($this->info[$tag]->attr[$attr]);
357
-                    continue;
358
-                } // this segment might get removed eventually
359
-                elseif (isset($forbidden_attributes["$tag.$attr"])) {
360
-                    // $tag.$attr are not user supplied, so no worries!
361
-                    trigger_error("Error with $tag.$attr: tag.attr syntax not supported for HTML.ForbiddenAttributes; use tag@attr instead", E_USER_WARNING);
362
-                }
363
-            }
364
-        }
365
-        foreach ($forbidden_attributes as $key => $v) {
366
-            if (strlen($key) < 2) continue;
367
-            if ($key[0] != '*') continue;
368
-            if ($key[1] == '.') {
369
-                trigger_error("Error with $key: *.attr syntax not supported for HTML.ForbiddenAttributes; use attr instead", E_USER_WARNING);
370
-            }
371
-        }
372
-
373
-        // setup injectors -----------------------------------------------------
374
-        foreach ($this->info_injector as $i => $injector) {
375
-            if ($injector->checkNeeded($config) !== false) {
376
-                // remove injector that does not have it's required
377
-                // elements/attributes present, and is thus not needed.
378
-                unset($this->info_injector[$i]);
379
-            }
380
-        }
381
-    }
382
-
383
-    /**
384
-     * Parses a TinyMCE-flavored Allowed Elements and Attributes list into
385
-     * separate lists for processing. Format is element[attr1|attr2],element2...
386
-     * @warning Although it's largely drawn from TinyMCE's implementation,
387
-     *      it is different, and you'll probably have to modify your lists
388
-     * @param $list String list to parse
389
-     * @param array($allowed_elements, $allowed_attributes)
390
-     * @todo Give this its own class, probably static interface
391
-     */
392
-    public function parseTinyMCEAllowedList($list) {
393
-
394
-        $list = str_replace(array(' ', "\t"), '', $list);
395
-
396
-        $elements = array();
397
-        $attributes = array();
398
-
399
-        $chunks = preg_split('/(,|[\n\r]+)/', $list);
400
-        foreach ($chunks as $chunk) {
401
-            if (empty($chunk)) continue;
402
-            // remove TinyMCE element control characters
403
-            if (!strpos($chunk, '[')) {
404
-                $element = $chunk;
405
-                $attr = false;
406
-            } else {
407
-                list($element, $attr) = explode('[', $chunk);
408
-            }
409
-            if ($element !== '*') $elements[$element] = true;
410
-            if (!$attr) continue;
411
-            $attr = substr($attr, 0, strlen($attr) - 1); // remove trailing ]
412
-            $attr = explode('|', $attr);
413
-            foreach ($attr as $key) {
414
-                $attributes["$element.$key"] = true;
415
-            }
416
-        }
417
-
418
-        return array($elements, $attributes);
419
-
420
-    }
29
+	// FULLY-PUBLIC VARIABLES ---------------------------------------------
30
+
31
+	/**
32
+	 * Associative array of element names to HTMLPurifier_ElementDef
33
+	 */
34
+	public $info = array();
35
+
36
+	/**
37
+	 * Associative array of global attribute name to attribute definition.
38
+	 */
39
+	public $info_global_attr = array();
40
+
41
+	/**
42
+	 * String name of parent element HTML will be going into.
43
+	 */
44
+	public $info_parent = 'div';
45
+
46
+	/**
47
+	 * Definition for parent element, allows parent element to be a
48
+	 * tag that's not allowed inside the HTML fragment.
49
+	 */
50
+	public $info_parent_def;
51
+
52
+	/**
53
+	 * String name of element used to wrap inline elements in block context
54
+	 * @note This is rarely used except for BLOCKQUOTEs in strict mode
55
+	 */
56
+	public $info_block_wrapper = 'p';
57
+
58
+	/**
59
+	 * Associative array of deprecated tag name to HTMLPurifier_TagTransform
60
+	 */
61
+	public $info_tag_transform = array();
62
+
63
+	/**
64
+	 * Indexed list of HTMLPurifier_AttrTransform to be performed before validation.
65
+	 */
66
+	public $info_attr_transform_pre = array();
67
+
68
+	/**
69
+	 * Indexed list of HTMLPurifier_AttrTransform to be performed after validation.
70
+	 */
71
+	public $info_attr_transform_post = array();
72
+
73
+	/**
74
+	 * Nested lookup array of content set name (Block, Inline) to
75
+	 * element name to whether or not it belongs in that content set.
76
+	 */
77
+	public $info_content_sets = array();
78
+
79
+	/**
80
+	 * Indexed list of HTMLPurifier_Injector to be used.
81
+	 */
82
+	public $info_injector = array();
83
+
84
+	/**
85
+	 * Doctype object
86
+	 */
87
+	public $doctype;
88
+
89
+
90
+
91
+	// RAW CUSTOMIZATION STUFF --------------------------------------------
92
+
93
+	/**
94
+	 * Adds a custom attribute to a pre-existing element
95
+	 * @note This is strictly convenience, and does not have a corresponding
96
+	 *       method in HTMLPurifier_HTMLModule
97
+	 * @param $element_name String element name to add attribute to
98
+	 * @param $attr_name String name of attribute
99
+	 * @param $def Attribute definition, can be string or object, see
100
+	 *             HTMLPurifier_AttrTypes for details
101
+	 */
102
+	public function addAttribute($element_name, $attr_name, $def) {
103
+		$module = $this->getAnonymousModule();
104
+		if (!isset($module->info[$element_name])) {
105
+			$element = $module->addBlankElement($element_name);
106
+		} else {
107
+			$element = $module->info[$element_name];
108
+		}
109
+		$element->attr[$attr_name] = $def;
110
+	}
111
+
112
+	/**
113
+	 * Adds a custom element to your HTML definition
114
+	 * @note See HTMLPurifier_HTMLModule::addElement for detailed
115
+	 *       parameter and return value descriptions.
116
+	 */
117
+	public function addElement($element_name, $type, $contents, $attr_collections, $attributes = array()) {
118
+		$module = $this->getAnonymousModule();
119
+		// assume that if the user is calling this, the element
120
+		// is safe. This may not be a good idea
121
+		$element = $module->addElement($element_name, $type, $contents, $attr_collections, $attributes);
122
+		return $element;
123
+	}
124
+
125
+	/**
126
+	 * Adds a blank element to your HTML definition, for overriding
127
+	 * existing behavior
128
+	 * @note See HTMLPurifier_HTMLModule::addBlankElement for detailed
129
+	 *       parameter and return value descriptions.
130
+	 */
131
+	public function addBlankElement($element_name) {
132
+		$module  = $this->getAnonymousModule();
133
+		$element = $module->addBlankElement($element_name);
134
+		return $element;
135
+	}
136
+
137
+	/**
138
+	 * Retrieves a reference to the anonymous module, so you can
139
+	 * bust out advanced features without having to make your own
140
+	 * module.
141
+	 */
142
+	public function getAnonymousModule() {
143
+		if (!$this->_anonModule) {
144
+			$this->_anonModule = new HTMLPurifier_HTMLModule();
145
+			$this->_anonModule->name = 'Anonymous';
146
+		}
147
+		return $this->_anonModule;
148
+	}
149
+
150
+	private $_anonModule = null;
151
+
152
+
153
+	// PUBLIC BUT INTERNAL VARIABLES --------------------------------------
154
+
155
+	public $type = 'HTML';
156
+	public $manager; /**< Instance of HTMLPurifier_HTMLModuleManager */
157
+
158
+	/**
159
+	 * Performs low-cost, preliminary initialization.
160
+	 */
161
+	public function __construct() {
162
+		$this->manager = new HTMLPurifier_HTMLModuleManager();
163
+	}
164
+
165
+	protected function doSetup($config) {
166
+		$this->processModules($config);
167
+		$this->setupConfigStuff($config);
168
+		unset($this->manager);
169
+
170
+		// cleanup some of the element definitions
171
+		foreach ($this->info as $k => $v) {
172
+			unset($this->info[$k]->content_model);
173
+			unset($this->info[$k]->content_model_type);
174
+		}
175
+	}
176
+
177
+	/**
178
+	 * Extract out the information from the manager
179
+	 */
180
+	protected function processModules($config) {
181
+
182
+		if ($this->_anonModule) {
183
+			// for user specific changes
184
+			// this is late-loaded so we don't have to deal with PHP4
185
+			// reference wonky-ness
186
+			$this->manager->addModule($this->_anonModule);
187
+			unset($this->_anonModule);
188
+		}
189
+
190
+		$this->manager->setup($config);
191
+		$this->doctype = $this->manager->doctype;
192
+
193
+		foreach ($this->manager->modules as $module) {
194
+			foreach($module->info_tag_transform as $k => $v) {
195
+				if ($v === false) unset($this->info_tag_transform[$k]);
196
+				else $this->info_tag_transform[$k] = $v;
197
+			}
198
+			foreach($module->info_attr_transform_pre as $k => $v) {
199
+				if ($v === false) unset($this->info_attr_transform_pre[$k]);
200
+				else $this->info_attr_transform_pre[$k] = $v;
201
+			}
202
+			foreach($module->info_attr_transform_post as $k => $v) {
203
+				if ($v === false) unset($this->info_attr_transform_post[$k]);
204
+				else $this->info_attr_transform_post[$k] = $v;
205
+			}
206
+			foreach ($module->info_injector as $k => $v) {
207
+				if ($v === false) unset($this->info_injector[$k]);
208
+				else $this->info_injector[$k] = $v;
209
+			}
210
+		}
211
+
212
+		$this->info = $this->manager->getElements();
213
+		$this->info_content_sets = $this->manager->contentSets->lookup;
214
+
215
+	}
216
+
217
+	/**
218
+	 * Sets up stuff based on config. We need a better way of doing this.
219
+	 */
220
+	protected function setupConfigStuff($config) {
221
+
222
+		$block_wrapper = $config->get('HTML.BlockWrapper');
223
+		if (isset($this->info_content_sets['Block'][$block_wrapper])) {
224
+			$this->info_block_wrapper = $block_wrapper;
225
+		} else {
226
+			trigger_error('Cannot use non-block element as block wrapper',
227
+				E_USER_ERROR);
228
+		}
229
+
230
+		$parent = $config->get('HTML.Parent');
231
+		$def = $this->manager->getElement($parent, true);
232
+		if ($def) {
233
+			$this->info_parent = $parent;
234
+			$this->info_parent_def = $def;
235
+		} else {
236
+			trigger_error('Cannot use unrecognized element as parent',
237
+				E_USER_ERROR);
238
+			$this->info_parent_def = $this->manager->getElement($this->info_parent, true);
239
+		}
240
+
241
+		// support template text
242
+		$support = "(for information on implementing this, see the ".
243
+				   "support forums) ";
244
+
245
+		// setup allowed elements -----------------------------------------
246
+
247
+		$allowed_elements = $config->get('HTML.AllowedElements');
248
+		$allowed_attributes = $config->get('HTML.AllowedAttributes'); // retrieve early
249
+
250
+		if (!is_array($allowed_elements) && !is_array($allowed_attributes)) {
251
+			$allowed = $config->get('HTML.Allowed');
252
+			if (is_string($allowed)) {
253
+				list($allowed_elements, $allowed_attributes) = $this->parseTinyMCEAllowedList($allowed);
254
+			}
255
+		}
256
+
257
+		if (is_array($allowed_elements)) {
258
+			foreach ($this->info as $name => $d) {
259
+				if(!isset($allowed_elements[$name])) unset($this->info[$name]);
260
+				unset($allowed_elements[$name]);
261
+			}
262
+			// emit errors
263
+			foreach ($allowed_elements as $element => $d) {
264
+				$element = htmlspecialchars($element, ENT_COMPAT | ENT_HTML401, 'UTF-8', false); // PHP doesn't escape errors, be careful!
265
+				trigger_error("Element '$element' is not supported $support", E_USER_WARNING);
266
+			}
267
+		}
268
+
269
+		// setup allowed attributes ---------------------------------------
270
+
271
+		$allowed_attributes_mutable = $allowed_attributes; // by copy!
272
+		if (is_array($allowed_attributes)) {
273
+
274
+			// This actually doesn't do anything, since we went away from
275
+			// global attributes. It's possible that userland code uses
276
+			// it, but HTMLModuleManager doesn't!
277
+			foreach ($this->info_global_attr as $attr => $x) {
278
+				$keys = array($attr, "*@$attr", "*.$attr");
279
+				$delete = true;
280
+				foreach ($keys as $key) {
281
+					if ($delete && isset($allowed_attributes[$key])) {
282
+						$delete = false;
283
+					}
284
+					if (isset($allowed_attributes_mutable[$key])) {
285
+						unset($allowed_attributes_mutable[$key]);
286
+					}
287
+				}
288
+				if ($delete) unset($this->info_global_attr[$attr]);
289
+			}
290
+
291
+			foreach ($this->info as $tag => $info) {
292
+				foreach ($info->attr as $attr => $x) {
293
+					$keys = array("$tag@$attr", $attr, "*@$attr", "$tag.$attr", "*.$attr");
294
+					$delete = true;
295
+					foreach ($keys as $key) {
296
+						if ($delete && isset($allowed_attributes[$key])) {
297
+							$delete = false;
298
+						}
299
+						if (isset($allowed_attributes_mutable[$key])) {
300
+							unset($allowed_attributes_mutable[$key]);
301
+						}
302
+					}
303
+					if ($delete) {
304
+						if ($this->info[$tag]->attr[$attr]->required) {
305
+							trigger_error("Required attribute '$attr' in element '$tag' was not allowed, which means '$tag' will not be allowed either", E_USER_WARNING);
306
+						}
307
+						unset($this->info[$tag]->attr[$attr]);
308
+					}
309
+				}
310
+			}
311
+			// emit errors
312
+			foreach ($allowed_attributes_mutable as $elattr => $d) {
313
+				$bits = preg_split('/[.@]/', $elattr, 2);
314
+				$c = count($bits);
315
+				switch ($c) {
316
+					case 2:
317
+						if ($bits[0] !== '*') {
318
+							$element = htmlspecialchars($bits[0], ENT_COMPAT | ENT_HTML401, 'UTF-8', false);
319
+							$attribute = htmlspecialchars($bits[1], ENT_COMPAT | ENT_HTML401, 'UTF-8', false);
320
+							if (!isset($this->info[$element])) {
321
+								trigger_error("Cannot allow attribute '$attribute' if element '$element' is not allowed/supported $support");
322
+							} else {
323
+								trigger_error("Attribute '$attribute' in element '$element' not supported $support",
324
+									E_USER_WARNING);
325
+							}
326
+							break;
327
+						}
328
+						// otherwise fall through
329
+					case 1:
330
+						$attribute = htmlspecialchars($bits[0], ENT_COMPAT | ENT_HTML401, 'UTF-8', false);
331
+						trigger_error("Global attribute '$attribute' is not ".
332
+							"supported in any elements $support",
333
+							E_USER_WARNING);
334
+						break;
335
+				}
336
+			}
337
+
338
+		}
339
+
340
+		// setup forbidden elements ---------------------------------------
341
+
342
+		$forbidden_elements   = $config->get('HTML.ForbiddenElements');
343
+		$forbidden_attributes = $config->get('HTML.ForbiddenAttributes');
344
+
345
+		foreach ($this->info as $tag => $info) {
346
+			if (isset($forbidden_elements[$tag])) {
347
+				unset($this->info[$tag]);
348
+				continue;
349
+			}
350
+			foreach ($info->attr as $attr => $x) {
351
+				if (
352
+					isset($forbidden_attributes["$tag@$attr"]) ||
353
+					isset($forbidden_attributes["*@$attr"]) ||
354
+					isset($forbidden_attributes[$attr])
355
+				) {
356
+					unset($this->info[$tag]->attr[$attr]);
357
+					continue;
358
+				} // this segment might get removed eventually
359
+				elseif (isset($forbidden_attributes["$tag.$attr"])) {
360
+					// $tag.$attr are not user supplied, so no worries!
361
+					trigger_error("Error with $tag.$attr: tag.attr syntax not supported for HTML.ForbiddenAttributes; use tag@attr instead", E_USER_WARNING);
362
+				}
363
+			}
364
+		}
365
+		foreach ($forbidden_attributes as $key => $v) {
366
+			if (strlen($key) < 2) continue;
367
+			if ($key[0] != '*') continue;
368
+			if ($key[1] == '.') {
369
+				trigger_error("Error with $key: *.attr syntax not supported for HTML.ForbiddenAttributes; use attr instead", E_USER_WARNING);
370
+			}
371
+		}
372
+
373
+		// setup injectors -----------------------------------------------------
374
+		foreach ($this->info_injector as $i => $injector) {
375
+			if ($injector->checkNeeded($config) !== false) {
376
+				// remove injector that does not have it's required
377
+				// elements/attributes present, and is thus not needed.
378
+				unset($this->info_injector[$i]);
379
+			}
380
+		}
381
+	}
382
+
383
+	/**
384
+	 * Parses a TinyMCE-flavored Allowed Elements and Attributes list into
385
+	 * separate lists for processing. Format is element[attr1|attr2],element2...
386
+	 * @warning Although it's largely drawn from TinyMCE's implementation,
387
+	 *      it is different, and you'll probably have to modify your lists
388
+	 * @param $list String list to parse
389
+	 * @param array($allowed_elements, $allowed_attributes)
390
+	 * @todo Give this its own class, probably static interface
391
+	 */
392
+	public function parseTinyMCEAllowedList($list) {
393
+
394
+		$list = str_replace(array(' ', "\t"), '', $list);
395
+
396
+		$elements = array();
397
+		$attributes = array();
398
+
399
+		$chunks = preg_split('/(,|[\n\r]+)/', $list);
400
+		foreach ($chunks as $chunk) {
401
+			if (empty($chunk)) continue;
402
+			// remove TinyMCE element control characters
403
+			if (!strpos($chunk, '[')) {
404
+				$element = $chunk;
405
+				$attr = false;
406
+			} else {
407
+				list($element, $attr) = explode('[', $chunk);
408
+			}
409
+			if ($element !== '*') $elements[$element] = true;
410
+			if (!$attr) continue;
411
+			$attr = substr($attr, 0, strlen($attr) - 1); // remove trailing ]
412
+			$attr = explode('|', $attr);
413
+			foreach ($attr as $key) {
414
+				$attributes["$element.$key"] = true;
415
+			}
416
+		}
417
+
418
+		return array($elements, $attributes);
419
+
420
+	}
421 421
 
422 422
 
423 423
 }
Please login to merge, or discard this patch.
Braces   +41 added lines, -15 removed lines patch added patch discarded remove patch
@@ -192,20 +192,32 @@  discard block
 block discarded – undo
192 192
 
193 193
         foreach ($this->manager->modules as $module) {
194 194
             foreach($module->info_tag_transform as $k => $v) {
195
-                if ($v === false) unset($this->info_tag_transform[$k]);
196
-                else $this->info_tag_transform[$k] = $v;
195
+                if ($v === false) {
196
+                	unset($this->info_tag_transform[$k]);
197
+                } else {
198
+                	$this->info_tag_transform[$k] = $v;
199
+                }
197 200
             }
198 201
             foreach($module->info_attr_transform_pre as $k => $v) {
199
-                if ($v === false) unset($this->info_attr_transform_pre[$k]);
200
-                else $this->info_attr_transform_pre[$k] = $v;
202
+                if ($v === false) {
203
+                	unset($this->info_attr_transform_pre[$k]);
204
+                } else {
205
+                	$this->info_attr_transform_pre[$k] = $v;
206
+                }
201 207
             }
202 208
             foreach($module->info_attr_transform_post as $k => $v) {
203
-                if ($v === false) unset($this->info_attr_transform_post[$k]);
204
-                else $this->info_attr_transform_post[$k] = $v;
209
+                if ($v === false) {
210
+                	unset($this->info_attr_transform_post[$k]);
211
+                } else {
212
+                	$this->info_attr_transform_post[$k] = $v;
213
+                }
205 214
             }
206 215
             foreach ($module->info_injector as $k => $v) {
207
-                if ($v === false) unset($this->info_injector[$k]);
208
-                else $this->info_injector[$k] = $v;
216
+                if ($v === false) {
217
+                	unset($this->info_injector[$k]);
218
+                } else {
219
+                	$this->info_injector[$k] = $v;
220
+                }
209 221
             }
210 222
         }
211 223
 
@@ -256,7 +268,9 @@  discard block
 block discarded – undo
256 268
 
257 269
         if (is_array($allowed_elements)) {
258 270
             foreach ($this->info as $name => $d) {
259
-                if(!isset($allowed_elements[$name])) unset($this->info[$name]);
271
+                if(!isset($allowed_elements[$name])) {
272
+                	unset($this->info[$name]);
273
+                }
260 274
                 unset($allowed_elements[$name]);
261 275
             }
262 276
             // emit errors
@@ -285,7 +299,9 @@  discard block
 block discarded – undo
285 299
                         unset($allowed_attributes_mutable[$key]);
286 300
                     }
287 301
                 }
288
-                if ($delete) unset($this->info_global_attr[$attr]);
302
+                if ($delete) {
303
+                	unset($this->info_global_attr[$attr]);
304
+                }
289 305
             }
290 306
 
291 307
             foreach ($this->info as $tag => $info) {
@@ -363,8 +379,12 @@  discard block
 block discarded – undo
363 379
             }
364 380
         }
365 381
         foreach ($forbidden_attributes as $key => $v) {
366
-            if (strlen($key) < 2) continue;
367
-            if ($key[0] != '*') continue;
382
+            if (strlen($key) < 2) {
383
+            	continue;
384
+            }
385
+            if ($key[0] != '*') {
386
+            	continue;
387
+            }
368 388
             if ($key[1] == '.') {
369 389
                 trigger_error("Error with $key: *.attr syntax not supported for HTML.ForbiddenAttributes; use attr instead", E_USER_WARNING);
370 390
             }
@@ -398,7 +418,9 @@  discard block
 block discarded – undo
398 418
 
399 419
         $chunks = preg_split('/(,|[\n\r]+)/', $list);
400 420
         foreach ($chunks as $chunk) {
401
-            if (empty($chunk)) continue;
421
+            if (empty($chunk)) {
422
+            	continue;
423
+            }
402 424
             // remove TinyMCE element control characters
403 425
             if (!strpos($chunk, '[')) {
404 426
                 $element = $chunk;
@@ -406,8 +428,12 @@  discard block
 block discarded – undo
406 428
             } else {
407 429
                 list($element, $attr) = explode('[', $chunk);
408 430
             }
409
-            if ($element !== '*') $elements[$element] = true;
410
-            if (!$attr) continue;
431
+            if ($element !== '*') {
432
+            	$elements[$element] = true;
433
+            }
434
+            if (!$attr) {
435
+            	continue;
436
+            }
411 437
             $attr = substr($attr, 0, strlen($attr) - 1); // remove trailing ]
412 438
             $attr = explode('|', $attr);
413 439
             foreach ($attr as $key) {
Please login to merge, or discard this patch.
Spacing   +4 added lines, -4 removed lines patch added patch discarded remove patch
@@ -191,15 +191,15 @@  discard block
 block discarded – undo
191 191
         $this->doctype = $this->manager->doctype;
192 192
 
193 193
         foreach ($this->manager->modules as $module) {
194
-            foreach($module->info_tag_transform as $k => $v) {
194
+            foreach ($module->info_tag_transform as $k => $v) {
195 195
                 if ($v === false) unset($this->info_tag_transform[$k]);
196 196
                 else $this->info_tag_transform[$k] = $v;
197 197
             }
198
-            foreach($module->info_attr_transform_pre as $k => $v) {
198
+            foreach ($module->info_attr_transform_pre as $k => $v) {
199 199
                 if ($v === false) unset($this->info_attr_transform_pre[$k]);
200 200
                 else $this->info_attr_transform_pre[$k] = $v;
201 201
             }
202
-            foreach($module->info_attr_transform_post as $k => $v) {
202
+            foreach ($module->info_attr_transform_post as $k => $v) {
203 203
                 if ($v === false) unset($this->info_attr_transform_post[$k]);
204 204
                 else $this->info_attr_transform_post[$k] = $v;
205 205
             }
@@ -256,7 +256,7 @@  discard block
 block discarded – undo
256 256
 
257 257
         if (is_array($allowed_elements)) {
258 258
             foreach ($this->info as $name => $d) {
259
-                if(!isset($allowed_elements[$name])) unset($this->info[$name]);
259
+                if (!isset($allowed_elements[$name])) unset($this->info[$name]);
260 260
                 unset($allowed_elements[$name]);
261 261
             }
262 262
             // emit errors
Please login to merge, or discard this patch.
classes/security/htmlpurifier/library/HTMLPurifier/HTMLModuleManager.php 4 patches
Doc Comments   +2 added lines, -1 removed lines patch added patch discarded remove patch
@@ -179,6 +179,7 @@  discard block
 block discarded – undo
179 179
     /**
180 180
      * Adds a module to the current doctype by first registering it,
181 181
      * and then tacking it on to the active doctype
182
+     * @param HTMLPurifier_HTMLModule $module
182 183
      */
183 184
     public function addModule($module) {
184 185
         $this->registerModule($module);
@@ -325,7 +326,7 @@  discard block
 block discarded – undo
325 326
     /**
326 327
      * Retrieves a single merged element definition
327 328
      * @param $name Name of element
328
-     * @param $trusted Boolean trusted overriding parameter: set to true
329
+     * @param boolean $trusted Boolean trusted overriding parameter: set to true
329 330
      *                 if you want the full version of an element
330 331
      * @return Merged HTMLPurifier_ElementDef
331 332
      * @note You may notice that modules are getting iterated over twice (once
Please login to merge, or discard this patch.
Indentation   +406 added lines, -406 removed lines patch added patch discarded remove patch
@@ -3,412 +3,412 @@
 block discarded – undo
3 3
 class HTMLPurifier_HTMLModuleManager
4 4
 {
5 5
 
6
-    /**
7
-     * Instance of HTMLPurifier_DoctypeRegistry
8
-     */
9
-    public $doctypes;
10
-
11
-    /**
12
-     * Instance of current doctype
13
-     */
14
-    public $doctype;
15
-
16
-    /**
17
-     * Instance of HTMLPurifier_AttrTypes
18
-     */
19
-    public $attrTypes;
20
-
21
-    /**
22
-     * Active instances of modules for the specified doctype are
23
-     * indexed, by name, in this array.
24
-     */
25
-    public $modules = array();
26
-
27
-    /**
28
-     * Array of recognized HTMLPurifier_Module instances, indexed by
29
-     * module's class name. This array is usually lazy loaded, but a
30
-     * user can overload a module by pre-emptively registering it.
31
-     */
32
-    public $registeredModules = array();
33
-
34
-    /**
35
-     * List of extra modules that were added by the user using addModule().
36
-     * These get unconditionally merged into the current doctype, whatever
37
-     * it may be.
38
-     */
39
-    public $userModules = array();
40
-
41
-    /**
42
-     * Associative array of element name to list of modules that have
43
-     * definitions for the element; this array is dynamically filled.
44
-     */
45
-    public $elementLookup = array();
46
-
47
-    /** List of prefixes we should use for registering small names */
48
-    public $prefixes = array('HTMLPurifier_HTMLModule_');
49
-
50
-    public $contentSets;     /**< Instance of HTMLPurifier_ContentSets */
51
-    public $attrCollections; /**< Instance of HTMLPurifier_AttrCollections */
52
-
53
-    /** If set to true, unsafe elements and attributes will be allowed */
54
-    public $trusted = false;
55
-
56
-    public function __construct() {
57
-
58
-        // editable internal objects
59
-        $this->attrTypes = new HTMLPurifier_AttrTypes();
60
-        $this->doctypes  = new HTMLPurifier_DoctypeRegistry();
61
-
62
-        // setup basic modules
63
-        $common = array(
64
-            'CommonAttributes', 'Text', 'Hypertext', 'List',
65
-            'Presentation', 'Edit', 'Bdo', 'Tables', 'Image',
66
-            'StyleAttribute',
67
-            // Unsafe:
68
-            'Scripting', 'Object', 'Forms',
69
-            // Sorta legacy, but present in strict:
70
-            'Name',
71
-        );
72
-        $transitional = array('Legacy', 'Target', 'Iframe');
73
-        $xml = array('XMLCommonAttributes');
74
-        $non_xml = array('NonXMLCommonAttributes');
75
-
76
-        // setup basic doctypes
77
-        $this->doctypes->register(
78
-            'HTML 4.01 Transitional', false,
79
-            array_merge($common, $transitional, $non_xml),
80
-            array('Tidy_Transitional', 'Tidy_Proprietary'),
81
-            array(),
82
-            '-//W3C//DTD HTML 4.01 Transitional//EN',
83
-            'http://www.w3.org/TR/html4/loose.dtd'
84
-        );
85
-
86
-        $this->doctypes->register(
87
-            'HTML 4.01 Strict', false,
88
-            array_merge($common, $non_xml),
89
-            array('Tidy_Strict', 'Tidy_Proprietary', 'Tidy_Name'),
90
-            array(),
91
-            '-//W3C//DTD HTML 4.01//EN',
92
-            'http://www.w3.org/TR/html4/strict.dtd'
93
-        );
94
-
95
-        $this->doctypes->register(
96
-            'XHTML 1.0 Transitional', true,
97
-            array_merge($common, $transitional, $xml, $non_xml),
98
-            array('Tidy_Transitional', 'Tidy_XHTML', 'Tidy_Proprietary', 'Tidy_Name'),
99
-            array(),
100
-            '-//W3C//DTD XHTML 1.0 Transitional//EN',
101
-            'http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd'
102
-        );
103
-
104
-        $this->doctypes->register(
105
-            'XHTML 1.0 Strict', true,
106
-            array_merge($common, $xml, $non_xml),
107
-            array('Tidy_Strict', 'Tidy_XHTML', 'Tidy_Strict', 'Tidy_Proprietary', 'Tidy_Name'),
108
-            array(),
109
-            '-//W3C//DTD XHTML 1.0 Strict//EN',
110
-            'http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd'
111
-        );
112
-
113
-        $this->doctypes->register(
114
-            'XHTML 1.1', true,
115
-            // Iframe is a real XHTML 1.1 module, despite being
116
-            // "transitional"!
117
-            array_merge($common, $xml, array('Ruby', 'Iframe')),
118
-            array('Tidy_Strict', 'Tidy_XHTML', 'Tidy_Proprietary', 'Tidy_Strict', 'Tidy_Name'), // Tidy_XHTML1_1
119
-            array(),
120
-            '-//W3C//DTD XHTML 1.1//EN',
121
-            'http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd'
122
-        );
123
-
124
-    }
125
-
126
-    /**
127
-     * Registers a module to the recognized module list, useful for
128
-     * overloading pre-existing modules.
129
-     * @param $module Mixed: string module name, with or without
130
-     *                HTMLPurifier_HTMLModule prefix, or instance of
131
-     *                subclass of HTMLPurifier_HTMLModule.
132
-     * @param $overload Boolean whether or not to overload previous modules.
133
-     *                  If this is not set, and you do overload a module,
134
-     *                  HTML Purifier will complain with a warning.
135
-     * @note This function will not call autoload, you must instantiate
136
-     *       (and thus invoke) autoload outside the method.
137
-     * @note If a string is passed as a module name, different variants
138
-     *       will be tested in this order:
139
-     *          - Check for HTMLPurifier_HTMLModule_$name
140
-     *          - Check all prefixes with $name in order they were added
141
-     *          - Check for literal object name
142
-     *          - Throw fatal error
143
-     *       If your object name collides with an internal class, specify
144
-     *       your module manually. All modules must have been included
145
-     *       externally: registerModule will not perform inclusions for you!
146
-     */
147
-    public function registerModule($module, $overload = false) {
148
-        if (is_string($module)) {
149
-            // attempt to load the module
150
-            $original_module = $module;
151
-            $ok = false;
152
-            foreach ($this->prefixes as $prefix) {
153
-                $module = $prefix . $original_module;
154
-                if (class_exists($module)) {
155
-                    $ok = true;
156
-                    break;
157
-                }
158
-            }
159
-            if (!$ok) {
160
-                $module = $original_module;
161
-                if (!class_exists($module)) {
162
-                    trigger_error($original_module . ' module does not exist',
163
-                        E_USER_ERROR);
164
-                    return;
165
-                }
166
-            }
167
-            $module = new $module();
168
-        }
169
-        if (empty($module->name)) {
170
-            trigger_error('Module instance of ' . get_class($module) . ' must have name');
171
-            return;
172
-        }
173
-        if (!$overload && isset($this->registeredModules[$module->name])) {
174
-            trigger_error('Overloading ' . $module->name . ' without explicit overload parameter', E_USER_WARNING);
175
-        }
176
-        $this->registeredModules[$module->name] = $module;
177
-    }
178
-
179
-    /**
180
-     * Adds a module to the current doctype by first registering it,
181
-     * and then tacking it on to the active doctype
182
-     */
183
-    public function addModule($module) {
184
-        $this->registerModule($module);
185
-        if (is_object($module)) $module = $module->name;
186
-        $this->userModules[] = $module;
187
-    }
188
-
189
-    /**
190
-     * Adds a class prefix that registerModule() will use to resolve a
191
-     * string name to a concrete class
192
-     */
193
-    public function addPrefix($prefix) {
194
-        $this->prefixes[] = $prefix;
195
-    }
196
-
197
-    /**
198
-     * Performs processing on modules, after being called you may
199
-     * use getElement() and getElements()
200
-     * @param $config Instance of HTMLPurifier_Config
201
-     */
202
-    public function setup($config) {
203
-
204
-        $this->trusted = $config->get('HTML.Trusted');
205
-
206
-        // generate
207
-        $this->doctype = $this->doctypes->make($config);
208
-        $modules = $this->doctype->modules;
209
-
210
-        // take out the default modules that aren't allowed
211
-        $lookup = $config->get('HTML.AllowedModules');
212
-        $special_cases = $config->get('HTML.CoreModules');
213
-
214
-        if (is_array($lookup)) {
215
-            foreach ($modules as $k => $m) {
216
-                if (isset($special_cases[$m])) continue;
217
-                if (!isset($lookup[$m])) unset($modules[$k]);
218
-            }
219
-        }
220
-
221
-        // custom modules
222
-        if ($config->get('HTML.Proprietary')) {
223
-            $modules[] = 'Proprietary';
224
-        }
225
-        if ($config->get('HTML.SafeObject')) {
226
-            $modules[] = 'SafeObject';
227
-        }
228
-        if ($config->get('HTML.SafeEmbed')) {
229
-            $modules[] = 'SafeEmbed';
230
-        }
231
-        if ($config->get('HTML.Nofollow')) {
232
-            $modules[] = 'Nofollow';
233
-        }
234
-        if ($config->get('HTML.TargetBlank')) {
235
-            $modules[] = 'TargetBlank';
236
-        }
237
-
238
-        // merge in custom modules
239
-        $modules = array_merge($modules, $this->userModules);
240
-
241
-        foreach ($modules as $module) {
242
-            $this->processModule($module);
243
-            $this->modules[$module]->setup($config);
244
-        }
245
-
246
-        foreach ($this->doctype->tidyModules as $module) {
247
-            $this->processModule($module);
248
-            $this->modules[$module]->setup($config);
249
-        }
250
-
251
-        // prepare any injectors
252
-        foreach ($this->modules as $module) {
253
-            $n = array();
254
-            foreach ($module->info_injector as $i => $injector) {
255
-                if (!is_object($injector)) {
256
-                    $class = "HTMLPurifier_Injector_$injector";
257
-                    $injector = new $class;
258
-                }
259
-                $n[$injector->name] = $injector;
260
-            }
261
-            $module->info_injector = $n;
262
-        }
263
-
264
-        // setup lookup table based on all valid modules
265
-        foreach ($this->modules as $module) {
266
-            foreach ($module->info as $name => $def) {
267
-                if (!isset($this->elementLookup[$name])) {
268
-                    $this->elementLookup[$name] = array();
269
-                }
270
-                $this->elementLookup[$name][] = $module->name;
271
-            }
272
-        }
273
-
274
-        // note the different choice
275
-        $this->contentSets = new HTMLPurifier_ContentSets(
276
-            // content set assembly deals with all possible modules,
277
-            // not just ones deemed to be "safe"
278
-            $this->modules
279
-        );
280
-        $this->attrCollections = new HTMLPurifier_AttrCollections(
281
-            $this->attrTypes,
282
-            // there is no way to directly disable a global attribute,
283
-            // but using AllowedAttributes or simply not including
284
-            // the module in your custom doctype should be sufficient
285
-            $this->modules
286
-        );
287
-    }
288
-
289
-    /**
290
-     * Takes a module and adds it to the active module collection,
291
-     * registering it if necessary.
292
-     */
293
-    public function processModule($module) {
294
-        if (!isset($this->registeredModules[$module]) || is_object($module)) {
295
-            $this->registerModule($module);
296
-        }
297
-        $this->modules[$module] = $this->registeredModules[$module];
298
-    }
299
-
300
-    /**
301
-     * Retrieves merged element definitions.
302
-     * @return Array of HTMLPurifier_ElementDef
303
-     */
304
-    public function getElements() {
305
-
306
-        $elements = array();
307
-        foreach ($this->modules as $module) {
308
-            if (!$this->trusted && !$module->safe) continue;
309
-            foreach ($module->info as $name => $v) {
310
-                if (isset($elements[$name])) continue;
311
-                $elements[$name] = $this->getElement($name);
312
-            }
313
-        }
314
-
315
-        // remove dud elements, this happens when an element that
316
-        // appeared to be safe actually wasn't
317
-        foreach ($elements as $n => $v) {
318
-            if ($v === false) unset($elements[$n]);
319
-        }
320
-
321
-        return $elements;
322
-
323
-    }
324
-
325
-    /**
326
-     * Retrieves a single merged element definition
327
-     * @param $name Name of element
328
-     * @param $trusted Boolean trusted overriding parameter: set to true
329
-     *                 if you want the full version of an element
330
-     * @return Merged HTMLPurifier_ElementDef
331
-     * @note You may notice that modules are getting iterated over twice (once
332
-     *       in getElements() and once here). This
333
-     *       is because
334
-     */
335
-    public function getElement($name, $trusted = null) {
336
-
337
-        if (!isset($this->elementLookup[$name])) {
338
-            return false;
339
-        }
340
-
341
-        // setup global state variables
342
-        $def = false;
343
-        if ($trusted === null) $trusted = $this->trusted;
344
-
345
-        // iterate through each module that has registered itself to this
346
-        // element
347
-        foreach($this->elementLookup[$name] as $module_name) {
348
-
349
-            $module = $this->modules[$module_name];
350
-
351
-            // refuse to create/merge from a module that is deemed unsafe--
352
-            // pretend the module doesn't exist--when trusted mode is not on.
353
-            if (!$trusted && !$module->safe) {
354
-                continue;
355
-            }
356
-
357
-            // clone is used because, ideally speaking, the original
358
-            // definition should not be modified. Usually, this will
359
-            // make no difference, but for consistency's sake
360
-            $new_def = clone $module->info[$name];
361
-
362
-            if (!$def && $new_def->standalone) {
363
-                $def = $new_def;
364
-            } elseif ($def) {
365
-                // This will occur even if $new_def is standalone. In practice,
366
-                // this will usually result in a full replacement.
367
-                $def->mergeIn($new_def);
368
-            } else {
369
-                // :TODO:
370
-                // non-standalone definitions that don't have a standalone
371
-                // to merge into could be deferred to the end
372
-                // HOWEVER, it is perfectly valid for a non-standalone
373
-                // definition to lack a standalone definition, even
374
-                // after all processing: this allows us to safely
375
-                // specify extra attributes for elements that may not be
376
-                // enabled all in one place.  In particular, this might
377
-                // be the case for trusted elements.  WARNING: care must
378
-                // be taken that the /extra/ definitions are all safe.
379
-                continue;
380
-            }
381
-
382
-            // attribute value expansions
383
-            $this->attrCollections->performInclusions($def->attr);
384
-            $this->attrCollections->expandIdentifiers($def->attr, $this->attrTypes);
385
-
386
-            // descendants_are_inline, for ChildDef_Chameleon
387
-            if (is_string($def->content_model) &&
388
-                strpos($def->content_model, 'Inline') !== false) {
389
-                if ($name != 'del' && $name != 'ins') {
390
-                    // this is for you, ins/del
391
-                    $def->descendants_are_inline = true;
392
-                }
393
-            }
394
-
395
-            $this->contentSets->generateChildDef($def, $module);
396
-        }
397
-
398
-        // This can occur if there is a blank definition, but no base to
399
-        // mix it in with
400
-        if (!$def) return false;
401
-
402
-        // add information on required attributes
403
-        foreach ($def->attr as $attr_name => $attr_def) {
404
-            if ($attr_def->required) {
405
-                $def->required_attr[] = $attr_name;
406
-            }
407
-        }
408
-
409
-        return $def;
410
-
411
-    }
6
+	/**
7
+	 * Instance of HTMLPurifier_DoctypeRegistry
8
+	 */
9
+	public $doctypes;
10
+
11
+	/**
12
+	 * Instance of current doctype
13
+	 */
14
+	public $doctype;
15
+
16
+	/**
17
+	 * Instance of HTMLPurifier_AttrTypes
18
+	 */
19
+	public $attrTypes;
20
+
21
+	/**
22
+	 * Active instances of modules for the specified doctype are
23
+	 * indexed, by name, in this array.
24
+	 */
25
+	public $modules = array();
26
+
27
+	/**
28
+	 * Array of recognized HTMLPurifier_Module instances, indexed by
29
+	 * module's class name. This array is usually lazy loaded, but a
30
+	 * user can overload a module by pre-emptively registering it.
31
+	 */
32
+	public $registeredModules = array();
33
+
34
+	/**
35
+	 * List of extra modules that were added by the user using addModule().
36
+	 * These get unconditionally merged into the current doctype, whatever
37
+	 * it may be.
38
+	 */
39
+	public $userModules = array();
40
+
41
+	/**
42
+	 * Associative array of element name to list of modules that have
43
+	 * definitions for the element; this array is dynamically filled.
44
+	 */
45
+	public $elementLookup = array();
46
+
47
+	/** List of prefixes we should use for registering small names */
48
+	public $prefixes = array('HTMLPurifier_HTMLModule_');
49
+
50
+	public $contentSets;     /**< Instance of HTMLPurifier_ContentSets */
51
+	public $attrCollections; /**< Instance of HTMLPurifier_AttrCollections */
52
+
53
+	/** If set to true, unsafe elements and attributes will be allowed */
54
+	public $trusted = false;
55
+
56
+	public function __construct() {
57
+
58
+		// editable internal objects
59
+		$this->attrTypes = new HTMLPurifier_AttrTypes();
60
+		$this->doctypes  = new HTMLPurifier_DoctypeRegistry();
61
+
62
+		// setup basic modules
63
+		$common = array(
64
+			'CommonAttributes', 'Text', 'Hypertext', 'List',
65
+			'Presentation', 'Edit', 'Bdo', 'Tables', 'Image',
66
+			'StyleAttribute',
67
+			// Unsafe:
68
+			'Scripting', 'Object', 'Forms',
69
+			// Sorta legacy, but present in strict:
70
+			'Name',
71
+		);
72
+		$transitional = array('Legacy', 'Target', 'Iframe');
73
+		$xml = array('XMLCommonAttributes');
74
+		$non_xml = array('NonXMLCommonAttributes');
75
+
76
+		// setup basic doctypes
77
+		$this->doctypes->register(
78
+			'HTML 4.01 Transitional', false,
79
+			array_merge($common, $transitional, $non_xml),
80
+			array('Tidy_Transitional', 'Tidy_Proprietary'),
81
+			array(),
82
+			'-//W3C//DTD HTML 4.01 Transitional//EN',
83
+			'http://www.w3.org/TR/html4/loose.dtd'
84
+		);
85
+
86
+		$this->doctypes->register(
87
+			'HTML 4.01 Strict', false,
88
+			array_merge($common, $non_xml),
89
+			array('Tidy_Strict', 'Tidy_Proprietary', 'Tidy_Name'),
90
+			array(),
91
+			'-//W3C//DTD HTML 4.01//EN',
92
+			'http://www.w3.org/TR/html4/strict.dtd'
93
+		);
94
+
95
+		$this->doctypes->register(
96
+			'XHTML 1.0 Transitional', true,
97
+			array_merge($common, $transitional, $xml, $non_xml),
98
+			array('Tidy_Transitional', 'Tidy_XHTML', 'Tidy_Proprietary', 'Tidy_Name'),
99
+			array(),
100
+			'-//W3C//DTD XHTML 1.0 Transitional//EN',
101
+			'http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd'
102
+		);
103
+
104
+		$this->doctypes->register(
105
+			'XHTML 1.0 Strict', true,
106
+			array_merge($common, $xml, $non_xml),
107
+			array('Tidy_Strict', 'Tidy_XHTML', 'Tidy_Strict', 'Tidy_Proprietary', 'Tidy_Name'),
108
+			array(),
109
+			'-//W3C//DTD XHTML 1.0 Strict//EN',
110
+			'http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd'
111
+		);
112
+
113
+		$this->doctypes->register(
114
+			'XHTML 1.1', true,
115
+			// Iframe is a real XHTML 1.1 module, despite being
116
+			// "transitional"!
117
+			array_merge($common, $xml, array('Ruby', 'Iframe')),
118
+			array('Tidy_Strict', 'Tidy_XHTML', 'Tidy_Proprietary', 'Tidy_Strict', 'Tidy_Name'), // Tidy_XHTML1_1
119
+			array(),
120
+			'-//W3C//DTD XHTML 1.1//EN',
121
+			'http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd'
122
+		);
123
+
124
+	}
125
+
126
+	/**
127
+	 * Registers a module to the recognized module list, useful for
128
+	 * overloading pre-existing modules.
129
+	 * @param $module Mixed: string module name, with or without
130
+	 *                HTMLPurifier_HTMLModule prefix, or instance of
131
+	 *                subclass of HTMLPurifier_HTMLModule.
132
+	 * @param $overload Boolean whether or not to overload previous modules.
133
+	 *                  If this is not set, and you do overload a module,
134
+	 *                  HTML Purifier will complain with a warning.
135
+	 * @note This function will not call autoload, you must instantiate
136
+	 *       (and thus invoke) autoload outside the method.
137
+	 * @note If a string is passed as a module name, different variants
138
+	 *       will be tested in this order:
139
+	 *          - Check for HTMLPurifier_HTMLModule_$name
140
+	 *          - Check all prefixes with $name in order they were added
141
+	 *          - Check for literal object name
142
+	 *          - Throw fatal error
143
+	 *       If your object name collides with an internal class, specify
144
+	 *       your module manually. All modules must have been included
145
+	 *       externally: registerModule will not perform inclusions for you!
146
+	 */
147
+	public function registerModule($module, $overload = false) {
148
+		if (is_string($module)) {
149
+			// attempt to load the module
150
+			$original_module = $module;
151
+			$ok = false;
152
+			foreach ($this->prefixes as $prefix) {
153
+				$module = $prefix . $original_module;
154
+				if (class_exists($module)) {
155
+					$ok = true;
156
+					break;
157
+				}
158
+			}
159
+			if (!$ok) {
160
+				$module = $original_module;
161
+				if (!class_exists($module)) {
162
+					trigger_error($original_module . ' module does not exist',
163
+						E_USER_ERROR);
164
+					return;
165
+				}
166
+			}
167
+			$module = new $module();
168
+		}
169
+		if (empty($module->name)) {
170
+			trigger_error('Module instance of ' . get_class($module) . ' must have name');
171
+			return;
172
+		}
173
+		if (!$overload && isset($this->registeredModules[$module->name])) {
174
+			trigger_error('Overloading ' . $module->name . ' without explicit overload parameter', E_USER_WARNING);
175
+		}
176
+		$this->registeredModules[$module->name] = $module;
177
+	}
178
+
179
+	/**
180
+	 * Adds a module to the current doctype by first registering it,
181
+	 * and then tacking it on to the active doctype
182
+	 */
183
+	public function addModule($module) {
184
+		$this->registerModule($module);
185
+		if (is_object($module)) $module = $module->name;
186
+		$this->userModules[] = $module;
187
+	}
188
+
189
+	/**
190
+	 * Adds a class prefix that registerModule() will use to resolve a
191
+	 * string name to a concrete class
192
+	 */
193
+	public function addPrefix($prefix) {
194
+		$this->prefixes[] = $prefix;
195
+	}
196
+
197
+	/**
198
+	 * Performs processing on modules, after being called you may
199
+	 * use getElement() and getElements()
200
+	 * @param $config Instance of HTMLPurifier_Config
201
+	 */
202
+	public function setup($config) {
203
+
204
+		$this->trusted = $config->get('HTML.Trusted');
205
+
206
+		// generate
207
+		$this->doctype = $this->doctypes->make($config);
208
+		$modules = $this->doctype->modules;
209
+
210
+		// take out the default modules that aren't allowed
211
+		$lookup = $config->get('HTML.AllowedModules');
212
+		$special_cases = $config->get('HTML.CoreModules');
213
+
214
+		if (is_array($lookup)) {
215
+			foreach ($modules as $k => $m) {
216
+				if (isset($special_cases[$m])) continue;
217
+				if (!isset($lookup[$m])) unset($modules[$k]);
218
+			}
219
+		}
220
+
221
+		// custom modules
222
+		if ($config->get('HTML.Proprietary')) {
223
+			$modules[] = 'Proprietary';
224
+		}
225
+		if ($config->get('HTML.SafeObject')) {
226
+			$modules[] = 'SafeObject';
227
+		}
228
+		if ($config->get('HTML.SafeEmbed')) {
229
+			$modules[] = 'SafeEmbed';
230
+		}
231
+		if ($config->get('HTML.Nofollow')) {
232
+			$modules[] = 'Nofollow';
233
+		}
234
+		if ($config->get('HTML.TargetBlank')) {
235
+			$modules[] = 'TargetBlank';
236
+		}
237
+
238
+		// merge in custom modules
239
+		$modules = array_merge($modules, $this->userModules);
240
+
241
+		foreach ($modules as $module) {
242
+			$this->processModule($module);
243
+			$this->modules[$module]->setup($config);
244
+		}
245
+
246
+		foreach ($this->doctype->tidyModules as $module) {
247
+			$this->processModule($module);
248
+			$this->modules[$module]->setup($config);
249
+		}
250
+
251
+		// prepare any injectors
252
+		foreach ($this->modules as $module) {
253
+			$n = array();
254
+			foreach ($module->info_injector as $i => $injector) {
255
+				if (!is_object($injector)) {
256
+					$class = "HTMLPurifier_Injector_$injector";
257
+					$injector = new $class;
258
+				}
259
+				$n[$injector->name] = $injector;
260
+			}
261
+			$module->info_injector = $n;
262
+		}
263
+
264
+		// setup lookup table based on all valid modules
265
+		foreach ($this->modules as $module) {
266
+			foreach ($module->info as $name => $def) {
267
+				if (!isset($this->elementLookup[$name])) {
268
+					$this->elementLookup[$name] = array();
269
+				}
270
+				$this->elementLookup[$name][] = $module->name;
271
+			}
272
+		}
273
+
274
+		// note the different choice
275
+		$this->contentSets = new HTMLPurifier_ContentSets(
276
+			// content set assembly deals with all possible modules,
277
+			// not just ones deemed to be "safe"
278
+			$this->modules
279
+		);
280
+		$this->attrCollections = new HTMLPurifier_AttrCollections(
281
+			$this->attrTypes,
282
+			// there is no way to directly disable a global attribute,
283
+			// but using AllowedAttributes or simply not including
284
+			// the module in your custom doctype should be sufficient
285
+			$this->modules
286
+		);
287
+	}
288
+
289
+	/**
290
+	 * Takes a module and adds it to the active module collection,
291
+	 * registering it if necessary.
292
+	 */
293
+	public function processModule($module) {
294
+		if (!isset($this->registeredModules[$module]) || is_object($module)) {
295
+			$this->registerModule($module);
296
+		}
297
+		$this->modules[$module] = $this->registeredModules[$module];
298
+	}
299
+
300
+	/**
301
+	 * Retrieves merged element definitions.
302
+	 * @return Array of HTMLPurifier_ElementDef
303
+	 */
304
+	public function getElements() {
305
+
306
+		$elements = array();
307
+		foreach ($this->modules as $module) {
308
+			if (!$this->trusted && !$module->safe) continue;
309
+			foreach ($module->info as $name => $v) {
310
+				if (isset($elements[$name])) continue;
311
+				$elements[$name] = $this->getElement($name);
312
+			}
313
+		}
314
+
315
+		// remove dud elements, this happens when an element that
316
+		// appeared to be safe actually wasn't
317
+		foreach ($elements as $n => $v) {
318
+			if ($v === false) unset($elements[$n]);
319
+		}
320
+
321
+		return $elements;
322
+
323
+	}
324
+
325
+	/**
326
+	 * Retrieves a single merged element definition
327
+	 * @param $name Name of element
328
+	 * @param $trusted Boolean trusted overriding parameter: set to true
329
+	 *                 if you want the full version of an element
330
+	 * @return Merged HTMLPurifier_ElementDef
331
+	 * @note You may notice that modules are getting iterated over twice (once
332
+	 *       in getElements() and once here). This
333
+	 *       is because
334
+	 */
335
+	public function getElement($name, $trusted = null) {
336
+
337
+		if (!isset($this->elementLookup[$name])) {
338
+			return false;
339
+		}
340
+
341
+		// setup global state variables
342
+		$def = false;
343
+		if ($trusted === null) $trusted = $this->trusted;
344
+
345
+		// iterate through each module that has registered itself to this
346
+		// element
347
+		foreach($this->elementLookup[$name] as $module_name) {
348
+
349
+			$module = $this->modules[$module_name];
350
+
351
+			// refuse to create/merge from a module that is deemed unsafe--
352
+			// pretend the module doesn't exist--when trusted mode is not on.
353
+			if (!$trusted && !$module->safe) {
354
+				continue;
355
+			}
356
+
357
+			// clone is used because, ideally speaking, the original
358
+			// definition should not be modified. Usually, this will
359
+			// make no difference, but for consistency's sake
360
+			$new_def = clone $module->info[$name];
361
+
362
+			if (!$def && $new_def->standalone) {
363
+				$def = $new_def;
364
+			} elseif ($def) {
365
+				// This will occur even if $new_def is standalone. In practice,
366
+				// this will usually result in a full replacement.
367
+				$def->mergeIn($new_def);
368
+			} else {
369
+				// :TODO:
370
+				// non-standalone definitions that don't have a standalone
371
+				// to merge into could be deferred to the end
372
+				// HOWEVER, it is perfectly valid for a non-standalone
373
+				// definition to lack a standalone definition, even
374
+				// after all processing: this allows us to safely
375
+				// specify extra attributes for elements that may not be
376
+				// enabled all in one place.  In particular, this might
377
+				// be the case for trusted elements.  WARNING: care must
378
+				// be taken that the /extra/ definitions are all safe.
379
+				continue;
380
+			}
381
+
382
+			// attribute value expansions
383
+			$this->attrCollections->performInclusions($def->attr);
384
+			$this->attrCollections->expandIdentifiers($def->attr, $this->attrTypes);
385
+
386
+			// descendants_are_inline, for ChildDef_Chameleon
387
+			if (is_string($def->content_model) &&
388
+				strpos($def->content_model, 'Inline') !== false) {
389
+				if ($name != 'del' && $name != 'ins') {
390
+					// this is for you, ins/del
391
+					$def->descendants_are_inline = true;
392
+				}
393
+			}
394
+
395
+			$this->contentSets->generateChildDef($def, $module);
396
+		}
397
+
398
+		// This can occur if there is a blank definition, but no base to
399
+		// mix it in with
400
+		if (!$def) return false;
401
+
402
+		// add information on required attributes
403
+		foreach ($def->attr as $attr_name => $attr_def) {
404
+			if ($attr_def->required) {
405
+				$def->required_attr[] = $attr_name;
406
+			}
407
+		}
408
+
409
+		return $def;
410
+
411
+	}
412 412
 
413 413
 }
414 414
 
Please login to merge, or discard this patch.
Braces   +24 added lines, -8 removed lines patch added patch discarded remove patch
@@ -182,7 +182,9 @@  discard block
 block discarded – undo
182 182
      */
183 183
     public function addModule($module) {
184 184
         $this->registerModule($module);
185
-        if (is_object($module)) $module = $module->name;
185
+        if (is_object($module)) {
186
+        	$module = $module->name;
187
+        }
186 188
         $this->userModules[] = $module;
187 189
     }
188 190
 
@@ -213,8 +215,12 @@  discard block
 block discarded – undo
213 215
 
214 216
         if (is_array($lookup)) {
215 217
             foreach ($modules as $k => $m) {
216
-                if (isset($special_cases[$m])) continue;
217
-                if (!isset($lookup[$m])) unset($modules[$k]);
218
+                if (isset($special_cases[$m])) {
219
+                	continue;
220
+                }
221
+                if (!isset($lookup[$m])) {
222
+                	unset($modules[$k]);
223
+                }
218 224
             }
219 225
         }
220 226
 
@@ -305,9 +311,13 @@  discard block
 block discarded – undo
305 311
 
306 312
         $elements = array();
307 313
         foreach ($this->modules as $module) {
308
-            if (!$this->trusted && !$module->safe) continue;
314
+            if (!$this->trusted && !$module->safe) {
315
+            	continue;
316
+            }
309 317
             foreach ($module->info as $name => $v) {
310
-                if (isset($elements[$name])) continue;
318
+                if (isset($elements[$name])) {
319
+                	continue;
320
+                }
311 321
                 $elements[$name] = $this->getElement($name);
312 322
             }
313 323
         }
@@ -315,7 +325,9 @@  discard block
 block discarded – undo
315 325
         // remove dud elements, this happens when an element that
316 326
         // appeared to be safe actually wasn't
317 327
         foreach ($elements as $n => $v) {
318
-            if ($v === false) unset($elements[$n]);
328
+            if ($v === false) {
329
+            	unset($elements[$n]);
330
+            }
319 331
         }
320 332
 
321 333
         return $elements;
@@ -340,7 +352,9 @@  discard block
 block discarded – undo
340 352
 
341 353
         // setup global state variables
342 354
         $def = false;
343
-        if ($trusted === null) $trusted = $this->trusted;
355
+        if ($trusted === null) {
356
+        	$trusted = $this->trusted;
357
+        }
344 358
 
345 359
         // iterate through each module that has registered itself to this
346 360
         // element
@@ -397,7 +411,9 @@  discard block
 block discarded – undo
397 411
 
398 412
         // This can occur if there is a blank definition, but no base to
399 413
         // mix it in with
400
-        if (!$def) return false;
414
+        if (!$def) {
415
+        	return false;
416
+        }
401 417
 
402 418
         // add information on required attributes
403 419
         foreach ($def->attr as $attr_name => $attr_def) {
Please login to merge, or discard this patch.
Spacing   +6 added lines, -6 removed lines patch added patch discarded remove patch
@@ -47,7 +47,7 @@  discard block
 block discarded – undo
47 47
     /** List of prefixes we should use for registering small names */
48 48
     public $prefixes = array('HTMLPurifier_HTMLModule_');
49 49
 
50
-    public $contentSets;     /**< Instance of HTMLPurifier_ContentSets */
50
+    public $contentSets; /**< Instance of HTMLPurifier_ContentSets */
51 51
     public $attrCollections; /**< Instance of HTMLPurifier_AttrCollections */
52 52
 
53 53
     /** If set to true, unsafe elements and attributes will be allowed */
@@ -150,7 +150,7 @@  discard block
 block discarded – undo
150 150
             $original_module = $module;
151 151
             $ok = false;
152 152
             foreach ($this->prefixes as $prefix) {
153
-                $module = $prefix . $original_module;
153
+                $module = $prefix.$original_module;
154 154
                 if (class_exists($module)) {
155 155
                     $ok = true;
156 156
                     break;
@@ -159,7 +159,7 @@  discard block
 block discarded – undo
159 159
             if (!$ok) {
160 160
                 $module = $original_module;
161 161
                 if (!class_exists($module)) {
162
-                    trigger_error($original_module . ' module does not exist',
162
+                    trigger_error($original_module.' module does not exist',
163 163
                         E_USER_ERROR);
164 164
                     return;
165 165
                 }
@@ -167,11 +167,11 @@  discard block
 block discarded – undo
167 167
             $module = new $module();
168 168
         }
169 169
         if (empty($module->name)) {
170
-            trigger_error('Module instance of ' . get_class($module) . ' must have name');
170
+            trigger_error('Module instance of '.get_class($module).' must have name');
171 171
             return;
172 172
         }
173 173
         if (!$overload && isset($this->registeredModules[$module->name])) {
174
-            trigger_error('Overloading ' . $module->name . ' without explicit overload parameter', E_USER_WARNING);
174
+            trigger_error('Overloading '.$module->name.' without explicit overload parameter', E_USER_WARNING);
175 175
         }
176 176
         $this->registeredModules[$module->name] = $module;
177 177
     }
@@ -344,7 +344,7 @@  discard block
 block discarded – undo
344 344
 
345 345
         // iterate through each module that has registered itself to this
346 346
         // element
347
-        foreach($this->elementLookup[$name] as $module_name) {
347
+        foreach ($this->elementLookup[$name] as $module_name) {
348 348
 
349 349
             $module = $this->modules[$module_name];
350 350
 
Please login to merge, or discard this patch.
classes/security/htmlpurifier/library/HTMLPurifier/IDAccumulator.php 3 patches
Doc Comments   +1 added lines, -1 removed lines patch added patch discarded remove patch
@@ -19,7 +19,7 @@
 block discarded – undo
19 19
      * Builds an IDAccumulator, also initializing the default blacklist
20 20
      * @param $config Instance of HTMLPurifier_Config
21 21
      * @param $context Instance of HTMLPurifier_Context
22
-     * @return Fully initialized HTMLPurifier_IDAccumulator
22
+     * @return HTMLPurifier_IDAccumulator initialized HTMLPurifier_IDAccumulator
23 23
      */
24 24
     public static function build($config, $context) {
25 25
         $id_accumulator = new HTMLPurifier_IDAccumulator();
Please login to merge, or discard this patch.
Indentation   +35 added lines, -35 removed lines patch added patch discarded remove patch
@@ -9,44 +9,44 @@
 block discarded – undo
9 9
 class HTMLPurifier_IDAccumulator
10 10
 {
11 11
 
12
-    /**
13
-     * Lookup table of IDs we've accumulated.
14
-     * @public
15
-     */
16
-    public $ids = array();
12
+	/**
13
+	 * Lookup table of IDs we've accumulated.
14
+	 * @public
15
+	 */
16
+	public $ids = array();
17 17
 
18
-    /**
19
-     * Builds an IDAccumulator, also initializing the default blacklist
20
-     * @param $config Instance of HTMLPurifier_Config
21
-     * @param $context Instance of HTMLPurifier_Context
22
-     * @return Fully initialized HTMLPurifier_IDAccumulator
23
-     */
24
-    public static function build($config, $context) {
25
-        $id_accumulator = new HTMLPurifier_IDAccumulator();
26
-        $id_accumulator->load($config->get('Attr.IDBlacklist'));
27
-        return $id_accumulator;
28
-    }
18
+	/**
19
+	 * Builds an IDAccumulator, also initializing the default blacklist
20
+	 * @param $config Instance of HTMLPurifier_Config
21
+	 * @param $context Instance of HTMLPurifier_Context
22
+	 * @return Fully initialized HTMLPurifier_IDAccumulator
23
+	 */
24
+	public static function build($config, $context) {
25
+		$id_accumulator = new HTMLPurifier_IDAccumulator();
26
+		$id_accumulator->load($config->get('Attr.IDBlacklist'));
27
+		return $id_accumulator;
28
+	}
29 29
 
30
-    /**
31
-     * Add an ID to the lookup table.
32
-     * @param $id ID to be added.
33
-     * @return Bool status, true if success, false if there's a dupe
34
-     */
35
-    public function add($id) {
36
-        if (isset($this->ids[$id])) return false;
37
-        return $this->ids[$id] = true;
38
-    }
30
+	/**
31
+	 * Add an ID to the lookup table.
32
+	 * @param $id ID to be added.
33
+	 * @return Bool status, true if success, false if there's a dupe
34
+	 */
35
+	public function add($id) {
36
+		if (isset($this->ids[$id])) return false;
37
+		return $this->ids[$id] = true;
38
+	}
39 39
 
40
-    /**
41
-     * Load a list of IDs into the lookup table
42
-     * @param $array_of_ids Array of IDs to load
43
-     * @note This function doesn't care about duplicates
44
-     */
45
-    public function load($array_of_ids) {
46
-        foreach ($array_of_ids as $id) {
47
-            $this->ids[$id] = true;
48
-        }
49
-    }
40
+	/**
41
+	 * Load a list of IDs into the lookup table
42
+	 * @param $array_of_ids Array of IDs to load
43
+	 * @note This function doesn't care about duplicates
44
+	 */
45
+	public function load($array_of_ids) {
46
+		foreach ($array_of_ids as $id) {
47
+			$this->ids[$id] = true;
48
+		}
49
+	}
50 50
 
51 51
 }
52 52
 
Please login to merge, or discard this patch.
Braces   +3 added lines, -1 removed lines patch added patch discarded remove patch
@@ -33,7 +33,9 @@
 block discarded – undo
33 33
      * @return Bool status, true if success, false if there's a dupe
34 34
      */
35 35
     public function add($id) {
36
-        if (isset($this->ids[$id])) return false;
36
+        if (isset($this->ids[$id])) {
37
+        	return false;
38
+        }
37 39
         return $this->ids[$id] = true;
38 40
     }
39 41
 
Please login to merge, or discard this patch.
classes/security/htmlpurifier/library/HTMLPurifier/Injector.php 4 patches
Doc Comments   +3 added lines, -2 removed lines patch added patch discarded remove patch
@@ -64,6 +64,7 @@  discard block
 block discarded – undo
64 64
      * result in infinite loops if not used carefully.
65 65
      * @warning HTML Purifier will prevent you from fast-forwarding with this
66 66
      *          function.
67
+     * @param integer $index
67 68
      */
68 69
     public function rewind($index) {
69 70
         $this->rewind = $index;
@@ -123,8 +124,8 @@  discard block
 block discarded – undo
123 124
 
124 125
     /**
125 126
      * Tests if the context node allows a certain element
126
-     * @param $name Name of element to test for
127
-     * @return True if element is allowed, false if it is not
127
+     * @param string $name Name of element to test for
128
+     * @return boolean if element is allowed, false if it is not
128 129
      */
129 130
     public function allowsElement($name) {
130 131
         if (!empty($this->currentNesting)) {
Please login to merge, or discard this patch.
Indentation   +216 added lines, -216 removed lines patch added patch discarded remove patch
@@ -16,222 +16,222 @@
 block discarded – undo
16 16
 abstract class HTMLPurifier_Injector
17 17
 {
18 18
 
19
-    /**
20
-     * Advisory name of injector, this is for friendly error messages
21
-     */
22
-    public $name;
23
-
24
-    /**
25
-     * Instance of HTMLPurifier_HTMLDefinition
26
-     */
27
-    protected $htmlDefinition;
28
-
29
-    /**
30
-     * Reference to CurrentNesting variable in Context. This is an array
31
-     * list of tokens that we are currently "inside"
32
-     */
33
-    protected $currentNesting;
34
-
35
-    /**
36
-     * Reference to InputTokens variable in Context. This is an array
37
-     * list of the input tokens that are being processed.
38
-     */
39
-    protected $inputTokens;
40
-
41
-    /**
42
-     * Reference to InputIndex variable in Context. This is an integer
43
-     * array index for $this->inputTokens that indicates what token
44
-     * is currently being processed.
45
-     */
46
-    protected $inputIndex;
47
-
48
-    /**
49
-     * Array of elements and attributes this injector creates and therefore
50
-     * need to be allowed by the definition. Takes form of
51
-     * array('element' => array('attr', 'attr2'), 'element2')
52
-     */
53
-    public $needed = array();
54
-
55
-    /**
56
-     * Index of inputTokens to rewind to.
57
-     */
58
-    protected $rewind = false;
59
-
60
-    /**
61
-     * Rewind to a spot to re-perform processing. This is useful if you
62
-     * deleted a node, and now need to see if this change affected any
63
-     * earlier nodes. Rewinding does not affect other injectors, and can
64
-     * result in infinite loops if not used carefully.
65
-     * @warning HTML Purifier will prevent you from fast-forwarding with this
66
-     *          function.
67
-     */
68
-    public function rewind($index) {
69
-        $this->rewind = $index;
70
-    }
71
-
72
-    /**
73
-     * Retrieves rewind, and then unsets it.
74
-     */
75
-    public function getRewind() {
76
-        $r = $this->rewind;
77
-        $this->rewind = false;
78
-        return $r;
79
-    }
80
-
81
-    /**
82
-     * Prepares the injector by giving it the config and context objects:
83
-     * this allows references to important variables to be made within
84
-     * the injector. This function also checks if the HTML environment
85
-     * will work with the Injector (see checkNeeded()).
86
-     * @param $config Instance of HTMLPurifier_Config
87
-     * @param $context Instance of HTMLPurifier_Context
88
-     * @return Boolean false if success, string of missing needed element/attribute if failure
89
-     */
90
-    public function prepare($config, $context) {
91
-        $this->htmlDefinition = $config->getHTMLDefinition();
92
-        // Even though this might fail, some unit tests ignore this and
93
-        // still test checkNeeded, so be careful. Maybe get rid of that
94
-        // dependency.
95
-        $result = $this->checkNeeded($config);
96
-        if ($result !== false) return $result;
97
-        $this->currentNesting =& $context->get('CurrentNesting');
98
-        $this->inputTokens    =& $context->get('InputTokens');
99
-        $this->inputIndex     =& $context->get('InputIndex');
100
-        return false;
101
-    }
102
-
103
-    /**
104
-     * This function checks if the HTML environment
105
-     * will work with the Injector: if p tags are not allowed, the
106
-     * Auto-Paragraphing injector should not be enabled.
107
-     * @param $config Instance of HTMLPurifier_Config
108
-     * @param $context Instance of HTMLPurifier_Context
109
-     * @return Boolean false if success, string of missing needed element/attribute if failure
110
-     */
111
-    public function checkNeeded($config) {
112
-        $def = $config->getHTMLDefinition();
113
-        foreach ($this->needed as $element => $attributes) {
114
-            if (is_int($element)) $element = $attributes;
115
-            if (!isset($def->info[$element])) return $element;
116
-            if (!is_array($attributes)) continue;
117
-            foreach ($attributes as $name) {
118
-                if (!isset($def->info[$element]->attr[$name])) return "$element.$name";
119
-            }
120
-        }
121
-        return false;
122
-    }
123
-
124
-    /**
125
-     * Tests if the context node allows a certain element
126
-     * @param $name Name of element to test for
127
-     * @return True if element is allowed, false if it is not
128
-     */
129
-    public function allowsElement($name) {
130
-        if (!empty($this->currentNesting)) {
131
-            $parent_token = array_pop($this->currentNesting);
132
-            $this->currentNesting[] = $parent_token;
133
-            $parent = $this->htmlDefinition->info[$parent_token->name];
134
-        } else {
135
-            $parent = $this->htmlDefinition->info_parent_def;
136
-        }
137
-        if (!isset($parent->child->elements[$name]) || isset($parent->excludes[$name])) {
138
-            return false;
139
-        }
140
-        // check for exclusion
141
-        for ($i = count($this->currentNesting) - 2; $i >= 0; $i--) {
142
-            $node = $this->currentNesting[$i];
143
-            $def  = $this->htmlDefinition->info[$node->name];
144
-            if (isset($def->excludes[$name])) return false;
145
-        }
146
-        return true;
147
-    }
148
-
149
-    /**
150
-     * Iterator function, which starts with the next token and continues until
151
-     * you reach the end of the input tokens.
152
-     * @warning Please prevent previous references from interfering with this
153
-     *          functions by setting $i = null beforehand!
154
-     * @param &$i Current integer index variable for inputTokens
155
-     * @param &$current Current token variable. Do NOT use $token, as that variable is also a reference
156
-     */
157
-    protected function forward(&$i, &$current) {
158
-        if ($i === null) $i = $this->inputIndex + 1;
159
-        else $i++;
160
-        if (!isset($this->inputTokens[$i])) return false;
161
-        $current = $this->inputTokens[$i];
162
-        return true;
163
-    }
164
-
165
-    /**
166
-     * Similar to _forward, but accepts a third parameter $nesting (which
167
-     * should be initialized at 0) and stops when we hit the end tag
168
-     * for the node $this->inputIndex starts in.
169
-     */
170
-    protected function forwardUntilEndToken(&$i, &$current, &$nesting) {
171
-        $result = $this->forward($i, $current);
172
-        if (!$result) return false;
173
-        if ($nesting === null) $nesting = 0;
174
-        if     ($current instanceof HTMLPurifier_Token_Start) $nesting++;
175
-        elseif ($current instanceof HTMLPurifier_Token_End) {
176
-            if ($nesting <= 0) return false;
177
-            $nesting--;
178
-        }
179
-        return true;
180
-    }
181
-
182
-    /**
183
-     * Iterator function, starts with the previous token and continues until
184
-     * you reach the beginning of input tokens.
185
-     * @warning Please prevent previous references from interfering with this
186
-     *          functions by setting $i = null beforehand!
187
-     * @param &$i Current integer index variable for inputTokens
188
-     * @param &$current Current token variable. Do NOT use $token, as that variable is also a reference
189
-     */
190
-    protected function backward(&$i, &$current) {
191
-        if ($i === null) $i = $this->inputIndex - 1;
192
-        else $i--;
193
-        if ($i < 0) return false;
194
-        $current = $this->inputTokens[$i];
195
-        return true;
196
-    }
197
-
198
-    /**
199
-     * Initializes the iterator at the current position. Use in a do {} while;
200
-     * loop to force the _forward and _backward functions to start at the
201
-     * current location.
202
-     * @warning Please prevent previous references from interfering with this
203
-     *          functions by setting $i = null beforehand!
204
-     * @param &$i Current integer index variable for inputTokens
205
-     * @param &$current Current token variable. Do NOT use $token, as that variable is also a reference
206
-     */
207
-    protected function current(&$i, &$current) {
208
-        if ($i === null) $i = $this->inputIndex;
209
-        $current = $this->inputTokens[$i];
210
-    }
211
-
212
-    /**
213
-     * Handler that is called when a text token is processed
214
-     */
215
-    public function handleText(&$token) {}
216
-
217
-    /**
218
-     * Handler that is called when a start or empty token is processed
219
-     */
220
-    public function handleElement(&$token) {}
221
-
222
-    /**
223
-     * Handler that is called when an end token is processed
224
-     */
225
-    public function handleEnd(&$token) {
226
-        $this->notifyEnd($token);
227
-    }
228
-
229
-    /**
230
-     * Notifier that is called when an end token is processed
231
-     * @note This differs from handlers in that the token is read-only
232
-     * @deprecated
233
-     */
234
-    public function notifyEnd($token) {}
19
+	/**
20
+	 * Advisory name of injector, this is for friendly error messages
21
+	 */
22
+	public $name;
23
+
24
+	/**
25
+	 * Instance of HTMLPurifier_HTMLDefinition
26
+	 */
27
+	protected $htmlDefinition;
28
+
29
+	/**
30
+	 * Reference to CurrentNesting variable in Context. This is an array
31
+	 * list of tokens that we are currently "inside"
32
+	 */
33
+	protected $currentNesting;
34
+
35
+	/**
36
+	 * Reference to InputTokens variable in Context. This is an array
37
+	 * list of the input tokens that are being processed.
38
+	 */
39
+	protected $inputTokens;
40
+
41
+	/**
42
+	 * Reference to InputIndex variable in Context. This is an integer
43
+	 * array index for $this->inputTokens that indicates what token
44
+	 * is currently being processed.
45
+	 */
46
+	protected $inputIndex;
47
+
48
+	/**
49
+	 * Array of elements and attributes this injector creates and therefore
50
+	 * need to be allowed by the definition. Takes form of
51
+	 * array('element' => array('attr', 'attr2'), 'element2')
52
+	 */
53
+	public $needed = array();
54
+
55
+	/**
56
+	 * Index of inputTokens to rewind to.
57
+	 */
58
+	protected $rewind = false;
59
+
60
+	/**
61
+	 * Rewind to a spot to re-perform processing. This is useful if you
62
+	 * deleted a node, and now need to see if this change affected any
63
+	 * earlier nodes. Rewinding does not affect other injectors, and can
64
+	 * result in infinite loops if not used carefully.
65
+	 * @warning HTML Purifier will prevent you from fast-forwarding with this
66
+	 *          function.
67
+	 */
68
+	public function rewind($index) {
69
+		$this->rewind = $index;
70
+	}
71
+
72
+	/**
73
+	 * Retrieves rewind, and then unsets it.
74
+	 */
75
+	public function getRewind() {
76
+		$r = $this->rewind;
77
+		$this->rewind = false;
78
+		return $r;
79
+	}
80
+
81
+	/**
82
+	 * Prepares the injector by giving it the config and context objects:
83
+	 * this allows references to important variables to be made within
84
+	 * the injector. This function also checks if the HTML environment
85
+	 * will work with the Injector (see checkNeeded()).
86
+	 * @param $config Instance of HTMLPurifier_Config
87
+	 * @param $context Instance of HTMLPurifier_Context
88
+	 * @return Boolean false if success, string of missing needed element/attribute if failure
89
+	 */
90
+	public function prepare($config, $context) {
91
+		$this->htmlDefinition = $config->getHTMLDefinition();
92
+		// Even though this might fail, some unit tests ignore this and
93
+		// still test checkNeeded, so be careful. Maybe get rid of that
94
+		// dependency.
95
+		$result = $this->checkNeeded($config);
96
+		if ($result !== false) return $result;
97
+		$this->currentNesting =& $context->get('CurrentNesting');
98
+		$this->inputTokens    =& $context->get('InputTokens');
99
+		$this->inputIndex     =& $context->get('InputIndex');
100
+		return false;
101
+	}
102
+
103
+	/**
104
+	 * This function checks if the HTML environment
105
+	 * will work with the Injector: if p tags are not allowed, the
106
+	 * Auto-Paragraphing injector should not be enabled.
107
+	 * @param $config Instance of HTMLPurifier_Config
108
+	 * @param $context Instance of HTMLPurifier_Context
109
+	 * @return Boolean false if success, string of missing needed element/attribute if failure
110
+	 */
111
+	public function checkNeeded($config) {
112
+		$def = $config->getHTMLDefinition();
113
+		foreach ($this->needed as $element => $attributes) {
114
+			if (is_int($element)) $element = $attributes;
115
+			if (!isset($def->info[$element])) return $element;
116
+			if (!is_array($attributes)) continue;
117
+			foreach ($attributes as $name) {
118
+				if (!isset($def->info[$element]->attr[$name])) return "$element.$name";
119
+			}
120
+		}
121
+		return false;
122
+	}
123
+
124
+	/**
125
+	 * Tests if the context node allows a certain element
126
+	 * @param $name Name of element to test for
127
+	 * @return True if element is allowed, false if it is not
128
+	 */
129
+	public function allowsElement($name) {
130
+		if (!empty($this->currentNesting)) {
131
+			$parent_token = array_pop($this->currentNesting);
132
+			$this->currentNesting[] = $parent_token;
133
+			$parent = $this->htmlDefinition->info[$parent_token->name];
134
+		} else {
135
+			$parent = $this->htmlDefinition->info_parent_def;
136
+		}
137
+		if (!isset($parent->child->elements[$name]) || isset($parent->excludes[$name])) {
138
+			return false;
139
+		}
140
+		// check for exclusion
141
+		for ($i = count($this->currentNesting) - 2; $i >= 0; $i--) {
142
+			$node = $this->currentNesting[$i];
143
+			$def  = $this->htmlDefinition->info[$node->name];
144
+			if (isset($def->excludes[$name])) return false;
145
+		}
146
+		return true;
147
+	}
148
+
149
+	/**
150
+	 * Iterator function, which starts with the next token and continues until
151
+	 * you reach the end of the input tokens.
152
+	 * @warning Please prevent previous references from interfering with this
153
+	 *          functions by setting $i = null beforehand!
154
+	 * @param &$i Current integer index variable for inputTokens
155
+	 * @param &$current Current token variable. Do NOT use $token, as that variable is also a reference
156
+	 */
157
+	protected function forward(&$i, &$current) {
158
+		if ($i === null) $i = $this->inputIndex + 1;
159
+		else $i++;
160
+		if (!isset($this->inputTokens[$i])) return false;
161
+		$current = $this->inputTokens[$i];
162
+		return true;
163
+	}
164
+
165
+	/**
166
+	 * Similar to _forward, but accepts a third parameter $nesting (which
167
+	 * should be initialized at 0) and stops when we hit the end tag
168
+	 * for the node $this->inputIndex starts in.
169
+	 */
170
+	protected function forwardUntilEndToken(&$i, &$current, &$nesting) {
171
+		$result = $this->forward($i, $current);
172
+		if (!$result) return false;
173
+		if ($nesting === null) $nesting = 0;
174
+		if     ($current instanceof HTMLPurifier_Token_Start) $nesting++;
175
+		elseif ($current instanceof HTMLPurifier_Token_End) {
176
+			if ($nesting <= 0) return false;
177
+			$nesting--;
178
+		}
179
+		return true;
180
+	}
181
+
182
+	/**
183
+	 * Iterator function, starts with the previous token and continues until
184
+	 * you reach the beginning of input tokens.
185
+	 * @warning Please prevent previous references from interfering with this
186
+	 *          functions by setting $i = null beforehand!
187
+	 * @param &$i Current integer index variable for inputTokens
188
+	 * @param &$current Current token variable. Do NOT use $token, as that variable is also a reference
189
+	 */
190
+	protected function backward(&$i, &$current) {
191
+		if ($i === null) $i = $this->inputIndex - 1;
192
+		else $i--;
193
+		if ($i < 0) return false;
194
+		$current = $this->inputTokens[$i];
195
+		return true;
196
+	}
197
+
198
+	/**
199
+	 * Initializes the iterator at the current position. Use in a do {} while;
200
+	 * loop to force the _forward and _backward functions to start at the
201
+	 * current location.
202
+	 * @warning Please prevent previous references from interfering with this
203
+	 *          functions by setting $i = null beforehand!
204
+	 * @param &$i Current integer index variable for inputTokens
205
+	 * @param &$current Current token variable. Do NOT use $token, as that variable is also a reference
206
+	 */
207
+	protected function current(&$i, &$current) {
208
+		if ($i === null) $i = $this->inputIndex;
209
+		$current = $this->inputTokens[$i];
210
+	}
211
+
212
+	/**
213
+	 * Handler that is called when a text token is processed
214
+	 */
215
+	public function handleText(&$token) {}
216
+
217
+	/**
218
+	 * Handler that is called when a start or empty token is processed
219
+	 */
220
+	public function handleElement(&$token) {}
221
+
222
+	/**
223
+	 * Handler that is called when an end token is processed
224
+	 */
225
+	public function handleEnd(&$token) {
226
+		$this->notifyEnd($token);
227
+	}
228
+
229
+	/**
230
+	 * Notifier that is called when an end token is processed
231
+	 * @note This differs from handlers in that the token is read-only
232
+	 * @deprecated
233
+	 */
234
+	public function notifyEnd($token) {}
235 235
 
236 236
 
237 237
 }
Please login to merge, or discard this patch.
Spacing   +4 added lines, -4 removed lines patch added patch discarded remove patch
@@ -94,9 +94,9 @@  discard block
 block discarded – undo
94 94
         // dependency.
95 95
         $result = $this->checkNeeded($config);
96 96
         if ($result !== false) return $result;
97
-        $this->currentNesting =& $context->get('CurrentNesting');
98
-        $this->inputTokens    =& $context->get('InputTokens');
99
-        $this->inputIndex     =& $context->get('InputIndex');
97
+        $this->currentNesting = & $context->get('CurrentNesting');
98
+        $this->inputTokens    = & $context->get('InputTokens');
99
+        $this->inputIndex     = & $context->get('InputIndex');
100 100
         return false;
101 101
     }
102 102
 
@@ -171,7 +171,7 @@  discard block
 block discarded – undo
171 171
         $result = $this->forward($i, $current);
172 172
         if (!$result) return false;
173 173
         if ($nesting === null) $nesting = 0;
174
-        if     ($current instanceof HTMLPurifier_Token_Start) $nesting++;
174
+        if ($current instanceof HTMLPurifier_Token_Start) $nesting++;
175 175
         elseif ($current instanceof HTMLPurifier_Token_End) {
176 176
             if ($nesting <= 0) return false;
177 177
             $nesting--;
Please login to merge, or discard this patch.
Braces   +49 added lines, -18 removed lines patch added patch discarded remove patch
@@ -93,7 +93,9 @@  discard block
 block discarded – undo
93 93
         // still test checkNeeded, so be careful. Maybe get rid of that
94 94
         // dependency.
95 95
         $result = $this->checkNeeded($config);
96
-        if ($result !== false) return $result;
96
+        if ($result !== false) {
97
+        	return $result;
98
+        }
97 99
         $this->currentNesting =& $context->get('CurrentNesting');
98 100
         $this->inputTokens    =& $context->get('InputTokens');
99 101
         $this->inputIndex     =& $context->get('InputIndex');
@@ -111,11 +113,19 @@  discard block
 block discarded – undo
111 113
     public function checkNeeded($config) {
112 114
         $def = $config->getHTMLDefinition();
113 115
         foreach ($this->needed as $element => $attributes) {
114
-            if (is_int($element)) $element = $attributes;
115
-            if (!isset($def->info[$element])) return $element;
116
-            if (!is_array($attributes)) continue;
116
+            if (is_int($element)) {
117
+            	$element = $attributes;
118
+            }
119
+            if (!isset($def->info[$element])) {
120
+            	return $element;
121
+            }
122
+            if (!is_array($attributes)) {
123
+            	continue;
124
+            }
117 125
             foreach ($attributes as $name) {
118
-                if (!isset($def->info[$element]->attr[$name])) return "$element.$name";
126
+                if (!isset($def->info[$element]->attr[$name])) {
127
+                	return "$element.$name";
128
+                }
119 129
             }
120 130
         }
121 131
         return false;
@@ -141,7 +151,9 @@  discard block
 block discarded – undo
141 151
         for ($i = count($this->currentNesting) - 2; $i >= 0; $i--) {
142 152
             $node = $this->currentNesting[$i];
143 153
             $def  = $this->htmlDefinition->info[$node->name];
144
-            if (isset($def->excludes[$name])) return false;
154
+            if (isset($def->excludes[$name])) {
155
+            	return false;
156
+            }
145 157
         }
146 158
         return true;
147 159
     }
@@ -155,9 +167,14 @@  discard block
 block discarded – undo
155 167
      * @param &$current Current token variable. Do NOT use $token, as that variable is also a reference
156 168
      */
157 169
     protected function forward(&$i, &$current) {
158
-        if ($i === null) $i = $this->inputIndex + 1;
159
-        else $i++;
160
-        if (!isset($this->inputTokens[$i])) return false;
170
+        if ($i === null) {
171
+        	$i = $this->inputIndex + 1;
172
+        } else {
173
+        	$i++;
174
+        }
175
+        if (!isset($this->inputTokens[$i])) {
176
+        	return false;
177
+        }
161 178
         $current = $this->inputTokens[$i];
162 179
         return true;
163 180
     }
@@ -169,11 +186,18 @@  discard block
 block discarded – undo
169 186
      */
170 187
     protected function forwardUntilEndToken(&$i, &$current, &$nesting) {
171 188
         $result = $this->forward($i, $current);
172
-        if (!$result) return false;
173
-        if ($nesting === null) $nesting = 0;
174
-        if     ($current instanceof HTMLPurifier_Token_Start) $nesting++;
175
-        elseif ($current instanceof HTMLPurifier_Token_End) {
176
-            if ($nesting <= 0) return false;
189
+        if (!$result) {
190
+        	return false;
191
+        }
192
+        if ($nesting === null) {
193
+        	$nesting = 0;
194
+        }
195
+        if     ($current instanceof HTMLPurifier_Token_Start) {
196
+        	$nesting++;
197
+        } elseif ($current instanceof HTMLPurifier_Token_End) {
198
+            if ($nesting <= 0) {
199
+            	return false;
200
+            }
177 201
             $nesting--;
178 202
         }
179 203
         return true;
@@ -188,9 +212,14 @@  discard block
 block discarded – undo
188 212
      * @param &$current Current token variable. Do NOT use $token, as that variable is also a reference
189 213
      */
190 214
     protected function backward(&$i, &$current) {
191
-        if ($i === null) $i = $this->inputIndex - 1;
192
-        else $i--;
193
-        if ($i < 0) return false;
215
+        if ($i === null) {
216
+        	$i = $this->inputIndex - 1;
217
+        } else {
218
+        	$i--;
219
+        }
220
+        if ($i < 0) {
221
+        	return false;
222
+        }
194 223
         $current = $this->inputTokens[$i];
195 224
         return true;
196 225
     }
@@ -205,7 +234,9 @@  discard block
 block discarded – undo
205 234
      * @param &$current Current token variable. Do NOT use $token, as that variable is also a reference
206 235
      */
207 236
     protected function current(&$i, &$current) {
208
-        if ($i === null) $i = $this->inputIndex;
237
+        if ($i === null) {
238
+        	$i = $this->inputIndex;
239
+        }
209 240
         $current = $this->inputTokens[$i];
210 241
     }
211 242
 
Please login to merge, or discard this patch.