@@ -9,6 +9,9 @@ |
||
9 | 9 | class HTMLPurifier_AttrDef_URI_IPv6 extends HTMLPurifier_AttrDef_URI_IPv4 |
10 | 10 | { |
11 | 11 | |
12 | + /** |
|
13 | + * @param string $aIP |
|
14 | + */ |
|
12 | 15 | public function validate($aIP, $config, $context) { |
13 | 16 | |
14 | 17 | if (!$this->ip4) $this->_loadRegex(); |
@@ -9,90 +9,90 @@ |
||
9 | 9 | class HTMLPurifier_AttrDef_URI_IPv6 extends HTMLPurifier_AttrDef_URI_IPv4 |
10 | 10 | { |
11 | 11 | |
12 | - public function validate($aIP, $config, $context) { |
|
13 | - |
|
14 | - if (!$this->ip4) $this->_loadRegex(); |
|
15 | - |
|
16 | - $original = $aIP; |
|
17 | - |
|
18 | - $hex = '[0-9a-fA-F]'; |
|
19 | - $blk = '(?:' . $hex . '{1,4})'; |
|
20 | - $pre = '(?:/(?:12[0-8]|1[0-1][0-9]|[1-9][0-9]|[0-9]))'; // /0 - /128 |
|
21 | - |
|
22 | - // prefix check |
|
23 | - if (strpos($aIP, '/') !== false) |
|
24 | - { |
|
25 | - if (preg_match('#' . $pre . '$#s', $aIP, $find)) |
|
26 | - { |
|
27 | - $aIP = substr($aIP, 0, 0-strlen($find[0])); |
|
28 | - unset($find); |
|
29 | - } |
|
30 | - else |
|
31 | - { |
|
32 | - return false; |
|
33 | - } |
|
34 | - } |
|
35 | - |
|
36 | - // IPv4-compatiblity check |
|
37 | - if (preg_match('#(?<=:'.')' . $this->ip4 . '$#s', $aIP, $find)) |
|
38 | - { |
|
39 | - $aIP = substr($aIP, 0, 0-strlen($find[0])); |
|
40 | - $ip = explode('.', $find[0]); |
|
41 | - $ip = array_map('dechex', $ip); |
|
42 | - $aIP .= $ip[0] . $ip[1] . ':' . $ip[2] . $ip[3]; |
|
43 | - unset($find, $ip); |
|
44 | - } |
|
45 | - |
|
46 | - // compression check |
|
47 | - $aIP = explode('::', $aIP); |
|
48 | - $c = count($aIP); |
|
49 | - if ($c > 2) |
|
50 | - { |
|
51 | - return false; |
|
52 | - } |
|
53 | - elseif ($c == 2) |
|
54 | - { |
|
55 | - list($first, $second) = $aIP; |
|
56 | - $first = explode(':', $first); |
|
57 | - $second = explode(':', $second); |
|
58 | - |
|
59 | - if (count($first) + count($second) > 8) |
|
60 | - { |
|
61 | - return false; |
|
62 | - } |
|
63 | - |
|
64 | - while(count($first) < 8) |
|
65 | - { |
|
66 | - array_push($first, '0'); |
|
67 | - } |
|
68 | - |
|
69 | - array_splice($first, 8 - count($second), 8, $second); |
|
70 | - $aIP = $first; |
|
71 | - unset($first,$second); |
|
72 | - } |
|
73 | - else |
|
74 | - { |
|
75 | - $aIP = explode(':', $aIP[0]); |
|
76 | - } |
|
77 | - $c = count($aIP); |
|
78 | - |
|
79 | - if ($c != 8) |
|
80 | - { |
|
81 | - return false; |
|
82 | - } |
|
83 | - |
|
84 | - // All the pieces should be 16-bit hex strings. Are they? |
|
85 | - foreach ($aIP as $piece) |
|
86 | - { |
|
87 | - if (!preg_match('#^[0-9a-fA-F]{4}$#s', sprintf('%04s', $piece))) |
|
88 | - { |
|
89 | - return false; |
|
90 | - } |
|
91 | - } |
|
92 | - |
|
93 | - return $original; |
|
94 | - |
|
95 | - } |
|
12 | + public function validate($aIP, $config, $context) { |
|
13 | + |
|
14 | + if (!$this->ip4) $this->_loadRegex(); |
|
15 | + |
|
16 | + $original = $aIP; |
|
17 | + |
|
18 | + $hex = '[0-9a-fA-F]'; |
|
19 | + $blk = '(?:' . $hex . '{1,4})'; |
|
20 | + $pre = '(?:/(?:12[0-8]|1[0-1][0-9]|[1-9][0-9]|[0-9]))'; // /0 - /128 |
|
21 | + |
|
22 | + // prefix check |
|
23 | + if (strpos($aIP, '/') !== false) |
|
24 | + { |
|
25 | + if (preg_match('#' . $pre . '$#s', $aIP, $find)) |
|
26 | + { |
|
27 | + $aIP = substr($aIP, 0, 0-strlen($find[0])); |
|
28 | + unset($find); |
|
29 | + } |
|
30 | + else |
|
31 | + { |
|
32 | + return false; |
|
33 | + } |
|
34 | + } |
|
35 | + |
|
36 | + // IPv4-compatiblity check |
|
37 | + if (preg_match('#(?<=:'.')' . $this->ip4 . '$#s', $aIP, $find)) |
|
38 | + { |
|
39 | + $aIP = substr($aIP, 0, 0-strlen($find[0])); |
|
40 | + $ip = explode('.', $find[0]); |
|
41 | + $ip = array_map('dechex', $ip); |
|
42 | + $aIP .= $ip[0] . $ip[1] . ':' . $ip[2] . $ip[3]; |
|
43 | + unset($find, $ip); |
|
44 | + } |
|
45 | + |
|
46 | + // compression check |
|
47 | + $aIP = explode('::', $aIP); |
|
48 | + $c = count($aIP); |
|
49 | + if ($c > 2) |
|
50 | + { |
|
51 | + return false; |
|
52 | + } |
|
53 | + elseif ($c == 2) |
|
54 | + { |
|
55 | + list($first, $second) = $aIP; |
|
56 | + $first = explode(':', $first); |
|
57 | + $second = explode(':', $second); |
|
58 | + |
|
59 | + if (count($first) + count($second) > 8) |
|
60 | + { |
|
61 | + return false; |
|
62 | + } |
|
63 | + |
|
64 | + while(count($first) < 8) |
|
65 | + { |
|
66 | + array_push($first, '0'); |
|
67 | + } |
|
68 | + |
|
69 | + array_splice($first, 8 - count($second), 8, $second); |
|
70 | + $aIP = $first; |
|
71 | + unset($first,$second); |
|
72 | + } |
|
73 | + else |
|
74 | + { |
|
75 | + $aIP = explode(':', $aIP[0]); |
|
76 | + } |
|
77 | + $c = count($aIP); |
|
78 | + |
|
79 | + if ($c != 8) |
|
80 | + { |
|
81 | + return false; |
|
82 | + } |
|
83 | + |
|
84 | + // All the pieces should be 16-bit hex strings. Are they? |
|
85 | + foreach ($aIP as $piece) |
|
86 | + { |
|
87 | + if (!preg_match('#^[0-9a-fA-F]{4}$#s', sprintf('%04s', $piece))) |
|
88 | + { |
|
89 | + return false; |
|
90 | + } |
|
91 | + } |
|
92 | + |
|
93 | + return $original; |
|
94 | + |
|
95 | + } |
|
96 | 96 | |
97 | 97 | } |
98 | 98 |
@@ -11,7 +11,9 @@ discard block |
||
11 | 11 | |
12 | 12 | public function validate($aIP, $config, $context) { |
13 | 13 | |
14 | - if (!$this->ip4) $this->_loadRegex(); |
|
14 | + if (!$this->ip4) { |
|
15 | + $this->_loadRegex(); |
|
16 | + } |
|
15 | 17 | |
16 | 18 | $original = $aIP; |
17 | 19 | |
@@ -26,8 +28,7 @@ discard block |
||
26 | 28 | { |
27 | 29 | $aIP = substr($aIP, 0, 0-strlen($find[0])); |
28 | 30 | unset($find); |
29 | - } |
|
30 | - else |
|
31 | + } else |
|
31 | 32 | { |
32 | 33 | return false; |
33 | 34 | } |
@@ -49,8 +50,7 @@ discard block |
||
49 | 50 | if ($c > 2) |
50 | 51 | { |
51 | 52 | return false; |
52 | - } |
|
53 | - elseif ($c == 2) |
|
53 | + } elseif ($c == 2) |
|
54 | 54 | { |
55 | 55 | list($first, $second) = $aIP; |
56 | 56 | $first = explode(':', $first); |
@@ -69,8 +69,7 @@ discard block |
||
69 | 69 | array_splice($first, 8 - count($second), 8, $second); |
70 | 70 | $aIP = $first; |
71 | 71 | unset($first,$second); |
72 | - } |
|
73 | - else |
|
72 | + } else |
|
74 | 73 | { |
75 | 74 | $aIP = explode(':', $aIP[0]); |
76 | 75 | } |
@@ -16,15 +16,15 @@ discard block |
||
16 | 16 | $original = $aIP; |
17 | 17 | |
18 | 18 | $hex = '[0-9a-fA-F]'; |
19 | - $blk = '(?:' . $hex . '{1,4})'; |
|
20 | - $pre = '(?:/(?:12[0-8]|1[0-1][0-9]|[1-9][0-9]|[0-9]))'; // /0 - /128 |
|
19 | + $blk = '(?:'.$hex.'{1,4})'; |
|
20 | + $pre = '(?:/(?:12[0-8]|1[0-1][0-9]|[1-9][0-9]|[0-9]))'; // /0 - /128 |
|
21 | 21 | |
22 | 22 | // prefix check |
23 | 23 | if (strpos($aIP, '/') !== false) |
24 | 24 | { |
25 | - if (preg_match('#' . $pre . '$#s', $aIP, $find)) |
|
25 | + if (preg_match('#'.$pre.'$#s', $aIP, $find)) |
|
26 | 26 | { |
27 | - $aIP = substr($aIP, 0, 0-strlen($find[0])); |
|
27 | + $aIP = substr($aIP, 0, 0 - strlen($find[0])); |
|
28 | 28 | unset($find); |
29 | 29 | } |
30 | 30 | else |
@@ -34,12 +34,12 @@ discard block |
||
34 | 34 | } |
35 | 35 | |
36 | 36 | // IPv4-compatiblity check |
37 | - if (preg_match('#(?<=:'.')' . $this->ip4 . '$#s', $aIP, $find)) |
|
37 | + if (preg_match('#(?<=:'.')'.$this->ip4.'$#s', $aIP, $find)) |
|
38 | 38 | { |
39 | - $aIP = substr($aIP, 0, 0-strlen($find[0])); |
|
39 | + $aIP = substr($aIP, 0, 0 - strlen($find[0])); |
|
40 | 40 | $ip = explode('.', $find[0]); |
41 | 41 | $ip = array_map('dechex', $ip); |
42 | - $aIP .= $ip[0] . $ip[1] . ':' . $ip[2] . $ip[3]; |
|
42 | + $aIP .= $ip[0].$ip[1].':'.$ip[2].$ip[3]; |
|
43 | 43 | unset($find, $ip); |
44 | 44 | } |
45 | 45 | |
@@ -61,14 +61,14 @@ discard block |
||
61 | 61 | return false; |
62 | 62 | } |
63 | 63 | |
64 | - while(count($first) < 8) |
|
64 | + while (count($first) < 8) |
|
65 | 65 | { |
66 | 66 | array_push($first, '0'); |
67 | 67 | } |
68 | 68 | |
69 | 69 | array_splice($first, 8 - count($second), 8, $second); |
70 | 70 | $aIP = $first; |
71 | - unset($first,$second); |
|
71 | + unset($first, $second); |
|
72 | 72 | } |
73 | 73 | else |
74 | 74 | { |
@@ -17,10 +17,12 @@ |
||
17 | 17 | * Registers a doctype to the registry |
18 | 18 | * @note Accepts a fully-formed doctype object, or the |
19 | 19 | * parameters for constructing a doctype object |
20 | - * @param $doctype Name of doctype or literal doctype object |
|
20 | + * @param string $doctype Name of doctype or literal doctype object |
|
21 | 21 | * @param $modules Modules doctype will load |
22 | 22 | * @param $modules_for_modes Modules doctype will load for certain modes |
23 | 23 | * @param $aliases Alias names for doctype |
24 | + * @param string $dtd_public |
|
25 | + * @param string $dtd_system |
|
24 | 26 | * @return Editable registered doctype |
25 | 27 | */ |
26 | 28 | public function register($doctype, $xml = true, $modules = array(), |
@@ -3,100 +3,100 @@ |
||
3 | 3 | class HTMLPurifier_DoctypeRegistry |
4 | 4 | { |
5 | 5 | |
6 | - /** |
|
7 | - * Hash of doctype names to doctype objects |
|
8 | - */ |
|
9 | - protected $doctypes; |
|
6 | + /** |
|
7 | + * Hash of doctype names to doctype objects |
|
8 | + */ |
|
9 | + protected $doctypes; |
|
10 | 10 | |
11 | - /** |
|
12 | - * Lookup table of aliases to real doctype names |
|
13 | - */ |
|
14 | - protected $aliases; |
|
11 | + /** |
|
12 | + * Lookup table of aliases to real doctype names |
|
13 | + */ |
|
14 | + protected $aliases; |
|
15 | 15 | |
16 | - /** |
|
17 | - * Registers a doctype to the registry |
|
18 | - * @note Accepts a fully-formed doctype object, or the |
|
19 | - * parameters for constructing a doctype object |
|
20 | - * @param $doctype Name of doctype or literal doctype object |
|
21 | - * @param $modules Modules doctype will load |
|
22 | - * @param $modules_for_modes Modules doctype will load for certain modes |
|
23 | - * @param $aliases Alias names for doctype |
|
24 | - * @return Editable registered doctype |
|
25 | - */ |
|
26 | - public function register($doctype, $xml = true, $modules = array(), |
|
27 | - $tidy_modules = array(), $aliases = array(), $dtd_public = null, $dtd_system = null |
|
28 | - ) { |
|
29 | - if (!is_array($modules)) $modules = array($modules); |
|
30 | - if (!is_array($tidy_modules)) $tidy_modules = array($tidy_modules); |
|
31 | - if (!is_array($aliases)) $aliases = array($aliases); |
|
32 | - if (!is_object($doctype)) { |
|
33 | - $doctype = new HTMLPurifier_Doctype( |
|
34 | - $doctype, $xml, $modules, $tidy_modules, $aliases, $dtd_public, $dtd_system |
|
35 | - ); |
|
36 | - } |
|
37 | - $this->doctypes[$doctype->name] = $doctype; |
|
38 | - $name = $doctype->name; |
|
39 | - // hookup aliases |
|
40 | - foreach ($doctype->aliases as $alias) { |
|
41 | - if (isset($this->doctypes[$alias])) continue; |
|
42 | - $this->aliases[$alias] = $name; |
|
43 | - } |
|
44 | - // remove old aliases |
|
45 | - if (isset($this->aliases[$name])) unset($this->aliases[$name]); |
|
46 | - return $doctype; |
|
47 | - } |
|
16 | + /** |
|
17 | + * Registers a doctype to the registry |
|
18 | + * @note Accepts a fully-formed doctype object, or the |
|
19 | + * parameters for constructing a doctype object |
|
20 | + * @param $doctype Name of doctype or literal doctype object |
|
21 | + * @param $modules Modules doctype will load |
|
22 | + * @param $modules_for_modes Modules doctype will load for certain modes |
|
23 | + * @param $aliases Alias names for doctype |
|
24 | + * @return Editable registered doctype |
|
25 | + */ |
|
26 | + public function register($doctype, $xml = true, $modules = array(), |
|
27 | + $tidy_modules = array(), $aliases = array(), $dtd_public = null, $dtd_system = null |
|
28 | + ) { |
|
29 | + if (!is_array($modules)) $modules = array($modules); |
|
30 | + if (!is_array($tidy_modules)) $tidy_modules = array($tidy_modules); |
|
31 | + if (!is_array($aliases)) $aliases = array($aliases); |
|
32 | + if (!is_object($doctype)) { |
|
33 | + $doctype = new HTMLPurifier_Doctype( |
|
34 | + $doctype, $xml, $modules, $tidy_modules, $aliases, $dtd_public, $dtd_system |
|
35 | + ); |
|
36 | + } |
|
37 | + $this->doctypes[$doctype->name] = $doctype; |
|
38 | + $name = $doctype->name; |
|
39 | + // hookup aliases |
|
40 | + foreach ($doctype->aliases as $alias) { |
|
41 | + if (isset($this->doctypes[$alias])) continue; |
|
42 | + $this->aliases[$alias] = $name; |
|
43 | + } |
|
44 | + // remove old aliases |
|
45 | + if (isset($this->aliases[$name])) unset($this->aliases[$name]); |
|
46 | + return $doctype; |
|
47 | + } |
|
48 | 48 | |
49 | - /** |
|
50 | - * Retrieves reference to a doctype of a certain name |
|
51 | - * @note This function resolves aliases |
|
52 | - * @note When possible, use the more fully-featured make() |
|
53 | - * @param $doctype Name of doctype |
|
54 | - * @return Editable doctype object |
|
55 | - */ |
|
56 | - public function get($doctype) { |
|
57 | - if (isset($this->aliases[$doctype])) $doctype = $this->aliases[$doctype]; |
|
58 | - if (!isset($this->doctypes[$doctype])) { |
|
59 | - trigger_error('Doctype ' . htmlspecialchars($doctype, ENT_COMPAT | ENT_HTML401, 'UTF-8', false) . ' does not exist', E_USER_ERROR); |
|
60 | - $anon = new HTMLPurifier_Doctype($doctype); |
|
61 | - return $anon; |
|
62 | - } |
|
63 | - return $this->doctypes[$doctype]; |
|
64 | - } |
|
49 | + /** |
|
50 | + * Retrieves reference to a doctype of a certain name |
|
51 | + * @note This function resolves aliases |
|
52 | + * @note When possible, use the more fully-featured make() |
|
53 | + * @param $doctype Name of doctype |
|
54 | + * @return Editable doctype object |
|
55 | + */ |
|
56 | + public function get($doctype) { |
|
57 | + if (isset($this->aliases[$doctype])) $doctype = $this->aliases[$doctype]; |
|
58 | + if (!isset($this->doctypes[$doctype])) { |
|
59 | + trigger_error('Doctype ' . htmlspecialchars($doctype, ENT_COMPAT | ENT_HTML401, 'UTF-8', false) . ' does not exist', E_USER_ERROR); |
|
60 | + $anon = new HTMLPurifier_Doctype($doctype); |
|
61 | + return $anon; |
|
62 | + } |
|
63 | + return $this->doctypes[$doctype]; |
|
64 | + } |
|
65 | 65 | |
66 | - /** |
|
67 | - * Creates a doctype based on a configuration object, |
|
68 | - * will perform initialization on the doctype |
|
69 | - * @note Use this function to get a copy of doctype that config |
|
70 | - * can hold on to (this is necessary in order to tell |
|
71 | - * Generator whether or not the current document is XML |
|
72 | - * based or not). |
|
73 | - */ |
|
74 | - public function make($config) { |
|
75 | - return clone $this->get($this->getDoctypeFromConfig($config)); |
|
76 | - } |
|
66 | + /** |
|
67 | + * Creates a doctype based on a configuration object, |
|
68 | + * will perform initialization on the doctype |
|
69 | + * @note Use this function to get a copy of doctype that config |
|
70 | + * can hold on to (this is necessary in order to tell |
|
71 | + * Generator whether or not the current document is XML |
|
72 | + * based or not). |
|
73 | + */ |
|
74 | + public function make($config) { |
|
75 | + return clone $this->get($this->getDoctypeFromConfig($config)); |
|
76 | + } |
|
77 | 77 | |
78 | - /** |
|
79 | - * Retrieves the doctype from the configuration object |
|
80 | - */ |
|
81 | - public function getDoctypeFromConfig($config) { |
|
82 | - // recommended test |
|
83 | - $doctype = $config->get('HTML.Doctype'); |
|
84 | - if (!empty($doctype)) return $doctype; |
|
85 | - $doctype = $config->get('HTML.CustomDoctype'); |
|
86 | - if (!empty($doctype)) return $doctype; |
|
87 | - // backwards-compatibility |
|
88 | - if ($config->get('HTML.XHTML')) { |
|
89 | - $doctype = 'XHTML 1.0'; |
|
90 | - } else { |
|
91 | - $doctype = 'HTML 4.01'; |
|
92 | - } |
|
93 | - if ($config->get('HTML.Strict')) { |
|
94 | - $doctype .= ' Strict'; |
|
95 | - } else { |
|
96 | - $doctype .= ' Transitional'; |
|
97 | - } |
|
98 | - return $doctype; |
|
99 | - } |
|
78 | + /** |
|
79 | + * Retrieves the doctype from the configuration object |
|
80 | + */ |
|
81 | + public function getDoctypeFromConfig($config) { |
|
82 | + // recommended test |
|
83 | + $doctype = $config->get('HTML.Doctype'); |
|
84 | + if (!empty($doctype)) return $doctype; |
|
85 | + $doctype = $config->get('HTML.CustomDoctype'); |
|
86 | + if (!empty($doctype)) return $doctype; |
|
87 | + // backwards-compatibility |
|
88 | + if ($config->get('HTML.XHTML')) { |
|
89 | + $doctype = 'XHTML 1.0'; |
|
90 | + } else { |
|
91 | + $doctype = 'HTML 4.01'; |
|
92 | + } |
|
93 | + if ($config->get('HTML.Strict')) { |
|
94 | + $doctype .= ' Strict'; |
|
95 | + } else { |
|
96 | + $doctype .= ' Transitional'; |
|
97 | + } |
|
98 | + return $doctype; |
|
99 | + } |
|
100 | 100 | |
101 | 101 | } |
102 | 102 |
@@ -26,9 +26,15 @@ discard block |
||
26 | 26 | public function register($doctype, $xml = true, $modules = array(), |
27 | 27 | $tidy_modules = array(), $aliases = array(), $dtd_public = null, $dtd_system = null |
28 | 28 | ) { |
29 | - if (!is_array($modules)) $modules = array($modules); |
|
30 | - if (!is_array($tidy_modules)) $tidy_modules = array($tidy_modules); |
|
31 | - if (!is_array($aliases)) $aliases = array($aliases); |
|
29 | + if (!is_array($modules)) { |
|
30 | + $modules = array($modules); |
|
31 | + } |
|
32 | + if (!is_array($tidy_modules)) { |
|
33 | + $tidy_modules = array($tidy_modules); |
|
34 | + } |
|
35 | + if (!is_array($aliases)) { |
|
36 | + $aliases = array($aliases); |
|
37 | + } |
|
32 | 38 | if (!is_object($doctype)) { |
33 | 39 | $doctype = new HTMLPurifier_Doctype( |
34 | 40 | $doctype, $xml, $modules, $tidy_modules, $aliases, $dtd_public, $dtd_system |
@@ -38,11 +44,15 @@ discard block |
||
38 | 44 | $name = $doctype->name; |
39 | 45 | // hookup aliases |
40 | 46 | foreach ($doctype->aliases as $alias) { |
41 | - if (isset($this->doctypes[$alias])) continue; |
|
47 | + if (isset($this->doctypes[$alias])) { |
|
48 | + continue; |
|
49 | + } |
|
42 | 50 | $this->aliases[$alias] = $name; |
43 | 51 | } |
44 | 52 | // remove old aliases |
45 | - if (isset($this->aliases[$name])) unset($this->aliases[$name]); |
|
53 | + if (isset($this->aliases[$name])) { |
|
54 | + unset($this->aliases[$name]); |
|
55 | + } |
|
46 | 56 | return $doctype; |
47 | 57 | } |
48 | 58 | |
@@ -54,7 +64,9 @@ discard block |
||
54 | 64 | * @return Editable doctype object |
55 | 65 | */ |
56 | 66 | public function get($doctype) { |
57 | - if (isset($this->aliases[$doctype])) $doctype = $this->aliases[$doctype]; |
|
67 | + if (isset($this->aliases[$doctype])) { |
|
68 | + $doctype = $this->aliases[$doctype]; |
|
69 | + } |
|
58 | 70 | if (!isset($this->doctypes[$doctype])) { |
59 | 71 | trigger_error('Doctype ' . htmlspecialchars($doctype, ENT_COMPAT | ENT_HTML401, 'UTF-8', false) . ' does not exist', E_USER_ERROR); |
60 | 72 | $anon = new HTMLPurifier_Doctype($doctype); |
@@ -81,9 +93,13 @@ discard block |
||
81 | 93 | public function getDoctypeFromConfig($config) { |
82 | 94 | // recommended test |
83 | 95 | $doctype = $config->get('HTML.Doctype'); |
84 | - if (!empty($doctype)) return $doctype; |
|
96 | + if (!empty($doctype)) { |
|
97 | + return $doctype; |
|
98 | + } |
|
85 | 99 | $doctype = $config->get('HTML.CustomDoctype'); |
86 | - if (!empty($doctype)) return $doctype; |
|
100 | + if (!empty($doctype)) { |
|
101 | + return $doctype; |
|
102 | + } |
|
87 | 103 | // backwards-compatibility |
88 | 104 | if ($config->get('HTML.XHTML')) { |
89 | 105 | $doctype = 'XHTML 1.0'; |
@@ -56,7 +56,7 @@ |
||
56 | 56 | public function get($doctype) { |
57 | 57 | if (isset($this->aliases[$doctype])) $doctype = $this->aliases[$doctype]; |
58 | 58 | if (!isset($this->doctypes[$doctype])) { |
59 | - trigger_error('Doctype ' . htmlspecialchars($doctype, ENT_COMPAT | ENT_HTML401, 'UTF-8', false) . ' does not exist', E_USER_ERROR); |
|
59 | + trigger_error('Doctype '.htmlspecialchars($doctype, ENT_COMPAT | ENT_HTML401, 'UTF-8', false).' does not exist', E_USER_ERROR); |
|
60 | 60 | $anon = new HTMLPurifier_Doctype($doctype); |
61 | 61 | return $anon; |
62 | 62 | } |
@@ -31,6 +31,9 @@ discard block |
||
31 | 31 | |
32 | 32 | /** |
33 | 33 | * iconv wrapper which mutes errors and works around bugs. |
34 | + * @param string $in |
|
35 | + * @param string $out |
|
36 | + * @param string $text |
|
34 | 37 | */ |
35 | 38 | public static function iconv($in, $out, $text, $max_chunk_size = 8000) { |
36 | 39 | $code = self::testIconvTruncateBug(); |
@@ -332,6 +335,7 @@ discard block |
||
332 | 335 | |
333 | 336 | /** |
334 | 337 | * Converts a string to UTF-8 based on configuration. |
338 | + * @param HTMLPurifier_Context $context |
|
335 | 339 | */ |
336 | 340 | public static function convertToUTF8($str, $config, $context) { |
337 | 341 | $encoding = $config->get('Core.Encoding'); |
@@ -362,6 +366,7 @@ discard block |
||
362 | 366 | * Converts a string from UTF-8 based on configuration. |
363 | 367 | * @note Currently, this is a lossy conversion, with unexpressable |
364 | 368 | * characters being omitted. |
369 | + * @param HTMLPurifier_Context $context |
|
365 | 370 | */ |
366 | 371 | public static function convertFromUTF8($str, $config, $context) { |
367 | 372 | $encoding = $config->get('Core.Encoding'); |
@@ -7,532 +7,532 @@ |
||
7 | 7 | class HTMLPurifier_Encoder |
8 | 8 | { |
9 | 9 | |
10 | - /** |
|
11 | - * Constructor throws fatal error if you attempt to instantiate class |
|
12 | - */ |
|
13 | - private function __construct() { |
|
14 | - trigger_error('Cannot instantiate encoder, call methods statically', E_USER_ERROR); |
|
15 | - } |
|
16 | - |
|
17 | - /** |
|
18 | - * Error-handler that mutes errors, alternative to shut-up operator. |
|
19 | - */ |
|
20 | - public static function muteErrorHandler() {} |
|
21 | - |
|
22 | - /** |
|
23 | - * iconv wrapper which mutes errors, but doesn't work around bugs. |
|
24 | - */ |
|
25 | - public static function unsafeIconv($in, $out, $text) { |
|
26 | - set_error_handler(array('HTMLPurifier_Encoder', 'muteErrorHandler')); |
|
27 | - $r = iconv($in, $out, $text); |
|
28 | - restore_error_handler(); |
|
29 | - return $r; |
|
30 | - } |
|
31 | - |
|
32 | - /** |
|
33 | - * iconv wrapper which mutes errors and works around bugs. |
|
34 | - */ |
|
35 | - public static function iconv($in, $out, $text, $max_chunk_size = 8000) { |
|
36 | - $code = self::testIconvTruncateBug(); |
|
37 | - if ($code == self::ICONV_OK) { |
|
38 | - return self::unsafeIconv($in, $out, $text); |
|
39 | - } elseif ($code == self::ICONV_TRUNCATES) { |
|
40 | - // we can only work around this if the input character set |
|
41 | - // is utf-8 |
|
42 | - if ($in == 'utf-8') { |
|
43 | - if ($max_chunk_size < 4) { |
|
44 | - trigger_error('max_chunk_size is too small', E_USER_WARNING); |
|
45 | - return false; |
|
46 | - } |
|
47 | - // split into 8000 byte chunks, but be careful to handle |
|
48 | - // multibyte boundaries properly |
|
49 | - if (($c = strlen($text)) <= $max_chunk_size) { |
|
50 | - return self::unsafeIconv($in, $out, $text); |
|
51 | - } |
|
52 | - $r = ''; |
|
53 | - $i = 0; |
|
54 | - while (true) { |
|
55 | - if ($i + $max_chunk_size >= $c) { |
|
56 | - $r .= self::unsafeIconv($in, $out, substr($text, $i)); |
|
57 | - break; |
|
58 | - } |
|
59 | - // wibble the boundary |
|
60 | - if (0x80 != (0xC0 & ord($text[$i + $max_chunk_size]))) { |
|
61 | - $chunk_size = $max_chunk_size; |
|
62 | - } elseif (0x80 != (0xC0 & ord($text[$i + $max_chunk_size - 1]))) { |
|
63 | - $chunk_size = $max_chunk_size - 1; |
|
64 | - } elseif (0x80 != (0xC0 & ord($text[$i + $max_chunk_size - 2]))) { |
|
65 | - $chunk_size = $max_chunk_size - 2; |
|
66 | - } elseif (0x80 != (0xC0 & ord($text[$i + $max_chunk_size - 3]))) { |
|
67 | - $chunk_size = $max_chunk_size - 3; |
|
68 | - } else { |
|
69 | - return false; // rather confusing UTF-8... |
|
70 | - } |
|
71 | - $chunk = substr($text, $i, $chunk_size); // substr doesn't mind overlong lengths |
|
72 | - $r .= self::unsafeIconv($in, $out, $chunk); |
|
73 | - $i += $chunk_size; |
|
74 | - } |
|
75 | - return $r; |
|
76 | - } else { |
|
77 | - return false; |
|
78 | - } |
|
79 | - } else { |
|
80 | - return false; |
|
81 | - } |
|
82 | - } |
|
83 | - |
|
84 | - /** |
|
85 | - * Cleans a UTF-8 string for well-formedness and SGML validity |
|
86 | - * |
|
87 | - * It will parse according to UTF-8 and return a valid UTF8 string, with |
|
88 | - * non-SGML codepoints excluded. |
|
89 | - * |
|
90 | - * @note Just for reference, the non-SGML code points are 0 to 31 and |
|
91 | - * 127 to 159, inclusive. However, we allow code points 9, 10 |
|
92 | - * and 13, which are the tab, line feed and carriage return |
|
93 | - * respectively. 128 and above the code points map to multibyte |
|
94 | - * UTF-8 representations. |
|
95 | - * |
|
96 | - * @note Fallback code adapted from utf8ToUnicode by Henri Sivonen and |
|
97 | - * [email protected] at <http://iki.fi/hsivonen/php-utf8/> under the |
|
98 | - * LGPL license. Notes on what changed are inside, but in general, |
|
99 | - * the original code transformed UTF-8 text into an array of integer |
|
100 | - * Unicode codepoints. Understandably, transforming that back to |
|
101 | - * a string would be somewhat expensive, so the function was modded to |
|
102 | - * directly operate on the string. However, this discourages code |
|
103 | - * reuse, and the logic enumerated here would be useful for any |
|
104 | - * function that needs to be able to understand UTF-8 characters. |
|
105 | - * As of right now, only smart lossless character encoding converters |
|
106 | - * would need that, and I'm probably not going to implement them. |
|
107 | - * Once again, PHP 6 should solve all our problems. |
|
108 | - */ |
|
109 | - public static function cleanUTF8($str, $force_php = false) { |
|
110 | - |
|
111 | - // UTF-8 validity is checked since PHP 4.3.5 |
|
112 | - // This is an optimization: if the string is already valid UTF-8, no |
|
113 | - // need to do PHP stuff. 99% of the time, this will be the case. |
|
114 | - // The regexp matches the XML char production, as well as well as excluding |
|
115 | - // non-SGML codepoints U+007F to U+009F |
|
116 | - if (preg_match('/^[\x{9}\x{A}\x{D}\x{20}-\x{7E}\x{A0}-\x{D7FF}\x{E000}-\x{FFFD}\x{10000}-\x{10FFFF}]*$/Du', $str)) { |
|
117 | - return $str; |
|
118 | - } |
|
119 | - |
|
120 | - $mState = 0; // cached expected number of octets after the current octet |
|
121 | - // until the beginning of the next UTF8 character sequence |
|
122 | - $mUcs4 = 0; // cached Unicode character |
|
123 | - $mBytes = 1; // cached expected number of octets in the current sequence |
|
124 | - |
|
125 | - // original code involved an $out that was an array of Unicode |
|
126 | - // codepoints. Instead of having to convert back into UTF-8, we've |
|
127 | - // decided to directly append valid UTF-8 characters onto a string |
|
128 | - // $out once they're done. $char accumulates raw bytes, while $mUcs4 |
|
129 | - // turns into the Unicode code point, so there's some redundancy. |
|
130 | - |
|
131 | - $out = ''; |
|
132 | - $char = ''; |
|
133 | - |
|
134 | - $len = strlen($str); |
|
135 | - for($i = 0; $i < $len; $i++) { |
|
136 | - $in = ord($str{$i}); |
|
137 | - $char .= $str[$i]; // append byte to char |
|
138 | - if (0 == $mState) { |
|
139 | - // When mState is zero we expect either a US-ASCII character |
|
140 | - // or a multi-octet sequence. |
|
141 | - if (0 == (0x80 & ($in))) { |
|
142 | - // US-ASCII, pass straight through. |
|
143 | - if (($in <= 31 || $in == 127) && |
|
144 | - !($in == 9 || $in == 13 || $in == 10) // save \r\t\n |
|
145 | - ) { |
|
146 | - // control characters, remove |
|
147 | - } else { |
|
148 | - $out .= $char; |
|
149 | - } |
|
150 | - // reset |
|
151 | - $char = ''; |
|
152 | - $mBytes = 1; |
|
153 | - } elseif (0xC0 == (0xE0 & ($in))) { |
|
154 | - // First octet of 2 octet sequence |
|
155 | - $mUcs4 = ($in); |
|
156 | - $mUcs4 = ($mUcs4 & 0x1F) << 6; |
|
157 | - $mState = 1; |
|
158 | - $mBytes = 2; |
|
159 | - } elseif (0xE0 == (0xF0 & ($in))) { |
|
160 | - // First octet of 3 octet sequence |
|
161 | - $mUcs4 = ($in); |
|
162 | - $mUcs4 = ($mUcs4 & 0x0F) << 12; |
|
163 | - $mState = 2; |
|
164 | - $mBytes = 3; |
|
165 | - } elseif (0xF0 == (0xF8 & ($in))) { |
|
166 | - // First octet of 4 octet sequence |
|
167 | - $mUcs4 = ($in); |
|
168 | - $mUcs4 = ($mUcs4 & 0x07) << 18; |
|
169 | - $mState = 3; |
|
170 | - $mBytes = 4; |
|
171 | - } elseif (0xF8 == (0xFC & ($in))) { |
|
172 | - // First octet of 5 octet sequence. |
|
173 | - // |
|
174 | - // This is illegal because the encoded codepoint must be |
|
175 | - // either: |
|
176 | - // (a) not the shortest form or |
|
177 | - // (b) outside the Unicode range of 0-0x10FFFF. |
|
178 | - // Rather than trying to resynchronize, we will carry on |
|
179 | - // until the end of the sequence and let the later error |
|
180 | - // handling code catch it. |
|
181 | - $mUcs4 = ($in); |
|
182 | - $mUcs4 = ($mUcs4 & 0x03) << 24; |
|
183 | - $mState = 4; |
|
184 | - $mBytes = 5; |
|
185 | - } elseif (0xFC == (0xFE & ($in))) { |
|
186 | - // First octet of 6 octet sequence, see comments for 5 |
|
187 | - // octet sequence. |
|
188 | - $mUcs4 = ($in); |
|
189 | - $mUcs4 = ($mUcs4 & 1) << 30; |
|
190 | - $mState = 5; |
|
191 | - $mBytes = 6; |
|
192 | - } else { |
|
193 | - // Current octet is neither in the US-ASCII range nor a |
|
194 | - // legal first octet of a multi-octet sequence. |
|
195 | - $mState = 0; |
|
196 | - $mUcs4 = 0; |
|
197 | - $mBytes = 1; |
|
198 | - $char = ''; |
|
199 | - } |
|
200 | - } else { |
|
201 | - // When mState is non-zero, we expect a continuation of the |
|
202 | - // multi-octet sequence |
|
203 | - if (0x80 == (0xC0 & ($in))) { |
|
204 | - // Legal continuation. |
|
205 | - $shift = ($mState - 1) * 6; |
|
206 | - $tmp = $in; |
|
207 | - $tmp = ($tmp & 0x0000003F) << $shift; |
|
208 | - $mUcs4 |= $tmp; |
|
209 | - |
|
210 | - if (0 == --$mState) { |
|
211 | - // End of the multi-octet sequence. mUcs4 now contains |
|
212 | - // the final Unicode codepoint to be output |
|
213 | - |
|
214 | - // Check for illegal sequences and codepoints. |
|
215 | - |
|
216 | - // From Unicode 3.1, non-shortest form is illegal |
|
217 | - if (((2 == $mBytes) && ($mUcs4 < 0x0080)) || |
|
218 | - ((3 == $mBytes) && ($mUcs4 < 0x0800)) || |
|
219 | - ((4 == $mBytes) && ($mUcs4 < 0x10000)) || |
|
220 | - (4 < $mBytes) || |
|
221 | - // From Unicode 3.2, surrogate characters = illegal |
|
222 | - (($mUcs4 & 0xFFFFF800) == 0xD800) || |
|
223 | - // Codepoints outside the Unicode range are illegal |
|
224 | - ($mUcs4 > 0x10FFFF) |
|
225 | - ) { |
|
226 | - |
|
227 | - } elseif (0xFEFF != $mUcs4 && // omit BOM |
|
228 | - // check for valid Char unicode codepoints |
|
229 | - ( |
|
230 | - 0x9 == $mUcs4 || |
|
231 | - 0xA == $mUcs4 || |
|
232 | - 0xD == $mUcs4 || |
|
233 | - (0x20 <= $mUcs4 && 0x7E >= $mUcs4) || |
|
234 | - // 7F-9F is not strictly prohibited by XML, |
|
235 | - // but it is non-SGML, and thus we don't allow it |
|
236 | - (0xA0 <= $mUcs4 && 0xD7FF >= $mUcs4) || |
|
237 | - (0x10000 <= $mUcs4 && 0x10FFFF >= $mUcs4) |
|
238 | - ) |
|
239 | - ) { |
|
240 | - $out .= $char; |
|
241 | - } |
|
242 | - // initialize UTF8 cache (reset) |
|
243 | - $mState = 0; |
|
244 | - $mUcs4 = 0; |
|
245 | - $mBytes = 1; |
|
246 | - $char = ''; |
|
247 | - } |
|
248 | - } else { |
|
249 | - // ((0xC0 & (*in) != 0x80) && (mState != 0)) |
|
250 | - // Incomplete multi-octet sequence. |
|
251 | - // used to result in complete fail, but we'll reset |
|
252 | - $mState = 0; |
|
253 | - $mUcs4 = 0; |
|
254 | - $mBytes = 1; |
|
255 | - $char =''; |
|
256 | - } |
|
257 | - } |
|
258 | - } |
|
259 | - return $out; |
|
260 | - } |
|
261 | - |
|
262 | - /** |
|
263 | - * Translates a Unicode codepoint into its corresponding UTF-8 character. |
|
264 | - * @note Based on Feyd's function at |
|
265 | - * <http://forums.devnetwork.net/viewtopic.php?p=191404#191404>, |
|
266 | - * which is in public domain. |
|
267 | - * @note While we're going to do code point parsing anyway, a good |
|
268 | - * optimization would be to refuse to translate code points that |
|
269 | - * are non-SGML characters. However, this could lead to duplication. |
|
270 | - * @note This is very similar to the unichr function in |
|
271 | - * maintenance/generate-entity-file.php (although this is superior, |
|
272 | - * due to its sanity checks). |
|
273 | - */ |
|
274 | - |
|
275 | - // +----------+----------+----------+----------+ |
|
276 | - // | 33222222 | 22221111 | 111111 | | |
|
277 | - // | 10987654 | 32109876 | 54321098 | 76543210 | bit |
|
278 | - // +----------+----------+----------+----------+ |
|
279 | - // | | | | 0xxxxxxx | 1 byte 0x00000000..0x0000007F |
|
280 | - // | | | 110yyyyy | 10xxxxxx | 2 byte 0x00000080..0x000007FF |
|
281 | - // | | 1110zzzz | 10yyyyyy | 10xxxxxx | 3 byte 0x00000800..0x0000FFFF |
|
282 | - // | 11110www | 10wwzzzz | 10yyyyyy | 10xxxxxx | 4 byte 0x00010000..0x0010FFFF |
|
283 | - // +----------+----------+----------+----------+ |
|
284 | - // | 00000000 | 00011111 | 11111111 | 11111111 | Theoretical upper limit of legal scalars: 2097151 (0x001FFFFF) |
|
285 | - // | 00000000 | 00010000 | 11111111 | 11111111 | Defined upper limit of legal scalar codes |
|
286 | - // +----------+----------+----------+----------+ |
|
287 | - |
|
288 | - public static function unichr($code) { |
|
289 | - if($code > 1114111 or $code < 0 or |
|
290 | - ($code >= 55296 and $code <= 57343) ) { |
|
291 | - // bits are set outside the "valid" range as defined |
|
292 | - // by UNICODE 4.1.0 |
|
293 | - return ''; |
|
294 | - } |
|
295 | - |
|
296 | - $x = $y = $z = $w = 0; |
|
297 | - if ($code < 128) { |
|
298 | - // regular ASCII character |
|
299 | - $x = $code; |
|
300 | - } else { |
|
301 | - // set up bits for UTF-8 |
|
302 | - $x = ($code & 63) | 128; |
|
303 | - if ($code < 2048) { |
|
304 | - $y = (($code & 2047) >> 6) | 192; |
|
305 | - } else { |
|
306 | - $y = (($code & 4032) >> 6) | 128; |
|
307 | - if($code < 65536) { |
|
308 | - $z = (($code >> 12) & 15) | 224; |
|
309 | - } else { |
|
310 | - $z = (($code >> 12) & 63) | 128; |
|
311 | - $w = (($code >> 18) & 7) | 240; |
|
312 | - } |
|
313 | - } |
|
314 | - } |
|
315 | - // set up the actual character |
|
316 | - $ret = ''; |
|
317 | - if($w) $ret .= chr($w); |
|
318 | - if($z) $ret .= chr($z); |
|
319 | - if($y) $ret .= chr($y); |
|
320 | - $ret .= chr($x); |
|
321 | - |
|
322 | - return $ret; |
|
323 | - } |
|
324 | - |
|
325 | - public static function iconvAvailable() { |
|
326 | - static $iconv = null; |
|
327 | - if ($iconv === null) { |
|
328 | - $iconv = function_exists('iconv') && self::testIconvTruncateBug() != self::ICONV_UNUSABLE; |
|
329 | - } |
|
330 | - return $iconv; |
|
331 | - } |
|
332 | - |
|
333 | - /** |
|
334 | - * Converts a string to UTF-8 based on configuration. |
|
335 | - */ |
|
336 | - public static function convertToUTF8($str, $config, $context) { |
|
337 | - $encoding = $config->get('Core.Encoding'); |
|
338 | - if ($encoding === 'utf-8') return $str; |
|
339 | - static $iconv = null; |
|
340 | - if ($iconv === null) $iconv = self::iconvAvailable(); |
|
341 | - if ($iconv && !$config->get('Test.ForceNoIconv')) { |
|
342 | - // unaffected by bugs, since UTF-8 support all characters |
|
343 | - $str = self::unsafeIconv($encoding, 'utf-8//IGNORE', $str); |
|
344 | - if ($str === false) { |
|
345 | - // $encoding is not a valid encoding |
|
346 | - trigger_error('Invalid encoding ' . $encoding, E_USER_ERROR); |
|
347 | - return ''; |
|
348 | - } |
|
349 | - // If the string is bjorked by Shift_JIS or a similar encoding |
|
350 | - // that doesn't support all of ASCII, convert the naughty |
|
351 | - // characters to their true byte-wise ASCII/UTF-8 equivalents. |
|
352 | - $str = strtr($str, self::testEncodingSupportsASCII($encoding)); |
|
353 | - return $str; |
|
354 | - } elseif ($encoding === 'iso-8859-1') { |
|
355 | - $str = utf8_encode($str); |
|
356 | - return $str; |
|
357 | - } |
|
358 | - trigger_error('Encoding not supported, please install iconv', E_USER_ERROR); |
|
359 | - } |
|
360 | - |
|
361 | - /** |
|
362 | - * Converts a string from UTF-8 based on configuration. |
|
363 | - * @note Currently, this is a lossy conversion, with unexpressable |
|
364 | - * characters being omitted. |
|
365 | - */ |
|
366 | - public static function convertFromUTF8($str, $config, $context) { |
|
367 | - $encoding = $config->get('Core.Encoding'); |
|
368 | - if ($escape = $config->get('Core.EscapeNonASCIICharacters')) { |
|
369 | - $str = self::convertToASCIIDumbLossless($str); |
|
370 | - } |
|
371 | - if ($encoding === 'utf-8') return $str; |
|
372 | - static $iconv = null; |
|
373 | - if ($iconv === null) $iconv = self::iconvAvailable(); |
|
374 | - if ($iconv && !$config->get('Test.ForceNoIconv')) { |
|
375 | - // Undo our previous fix in convertToUTF8, otherwise iconv will barf |
|
376 | - $ascii_fix = self::testEncodingSupportsASCII($encoding); |
|
377 | - if (!$escape && !empty($ascii_fix)) { |
|
378 | - $clear_fix = array(); |
|
379 | - foreach ($ascii_fix as $utf8 => $native) $clear_fix[$utf8] = ''; |
|
380 | - $str = strtr($str, $clear_fix); |
|
381 | - } |
|
382 | - $str = strtr($str, array_flip($ascii_fix)); |
|
383 | - // Normal stuff |
|
384 | - $str = self::iconv('utf-8', $encoding . '//IGNORE', $str); |
|
385 | - return $str; |
|
386 | - } elseif ($encoding === 'iso-8859-1') { |
|
387 | - $str = utf8_decode($str); |
|
388 | - return $str; |
|
389 | - } |
|
390 | - trigger_error('Encoding not supported', E_USER_ERROR); |
|
391 | - // You might be tempted to assume that the ASCII representation |
|
392 | - // might be OK, however, this is *not* universally true over all |
|
393 | - // encodings. So we take the conservative route here, rather |
|
394 | - // than forcibly turn on %Core.EscapeNonASCIICharacters |
|
395 | - } |
|
396 | - |
|
397 | - /** |
|
398 | - * Lossless (character-wise) conversion of HTML to ASCII |
|
399 | - * @param $str UTF-8 string to be converted to ASCII |
|
400 | - * @returns ASCII encoded string with non-ASCII character entity-ized |
|
401 | - * @warning Adapted from MediaWiki, claiming fair use: this is a common |
|
402 | - * algorithm. If you disagree with this license fudgery, |
|
403 | - * implement it yourself. |
|
404 | - * @note Uses decimal numeric entities since they are best supported. |
|
405 | - * @note This is a DUMB function: it has no concept of keeping |
|
406 | - * character entities that the projected character encoding |
|
407 | - * can allow. We could possibly implement a smart version |
|
408 | - * but that would require it to also know which Unicode |
|
409 | - * codepoints the charset supported (not an easy task). |
|
410 | - * @note Sort of with cleanUTF8() but it assumes that $str is |
|
411 | - * well-formed UTF-8 |
|
412 | - */ |
|
413 | - public static function convertToASCIIDumbLossless($str) { |
|
414 | - $bytesleft = 0; |
|
415 | - $result = ''; |
|
416 | - $working = 0; |
|
417 | - $len = strlen($str); |
|
418 | - for( $i = 0; $i < $len; $i++ ) { |
|
419 | - $bytevalue = ord( $str[$i] ); |
|
420 | - if( $bytevalue <= 0x7F ) { //0xxx xxxx |
|
421 | - $result .= chr( $bytevalue ); |
|
422 | - $bytesleft = 0; |
|
423 | - } elseif( $bytevalue <= 0xBF ) { //10xx xxxx |
|
424 | - $working = $working << 6; |
|
425 | - $working += ($bytevalue & 0x3F); |
|
426 | - $bytesleft--; |
|
427 | - if( $bytesleft <= 0 ) { |
|
428 | - $result .= "&#" . $working . ";"; |
|
429 | - } |
|
430 | - } elseif( $bytevalue <= 0xDF ) { //110x xxxx |
|
431 | - $working = $bytevalue & 0x1F; |
|
432 | - $bytesleft = 1; |
|
433 | - } elseif( $bytevalue <= 0xEF ) { //1110 xxxx |
|
434 | - $working = $bytevalue & 0x0F; |
|
435 | - $bytesleft = 2; |
|
436 | - } else { //1111 0xxx |
|
437 | - $working = $bytevalue & 0x07; |
|
438 | - $bytesleft = 3; |
|
439 | - } |
|
440 | - } |
|
441 | - return $result; |
|
442 | - } |
|
443 | - |
|
444 | - /** No bugs detected in iconv. */ |
|
445 | - const ICONV_OK = 0; |
|
446 | - |
|
447 | - /** Iconv truncates output if converting from UTF-8 to another |
|
448 | - * character set with //IGNORE, and a non-encodable character is found */ |
|
449 | - const ICONV_TRUNCATES = 1; |
|
450 | - |
|
451 | - /** Iconv does not support //IGNORE, making it unusable for |
|
452 | - * transcoding purposes */ |
|
453 | - const ICONV_UNUSABLE = 2; |
|
454 | - |
|
455 | - /** |
|
456 | - * glibc iconv has a known bug where it doesn't handle the magic |
|
457 | - * //IGNORE stanza correctly. In particular, rather than ignore |
|
458 | - * characters, it will return an EILSEQ after consuming some number |
|
459 | - * of characters, and expect you to restart iconv as if it were |
|
460 | - * an E2BIG. Old versions of PHP did not respect the errno, and |
|
461 | - * returned the fragment, so as a result you would see iconv |
|
462 | - * mysteriously truncating output. We can work around this by |
|
463 | - * manually chopping our input into segments of about 8000 |
|
464 | - * characters, as long as PHP ignores the error code. If PHP starts |
|
465 | - * paying attention to the error code, iconv becomes unusable. |
|
466 | - * |
|
467 | - * @returns Error code indicating severity of bug. |
|
468 | - */ |
|
469 | - public static function testIconvTruncateBug() { |
|
470 | - static $code = null; |
|
471 | - if ($code === null) { |
|
472 | - // better not use iconv, otherwise infinite loop! |
|
473 | - $r = self::unsafeIconv('utf-8', 'ascii//IGNORE', "\xCE\xB1" . str_repeat('a', 9000)); |
|
474 | - if ($r === false) { |
|
475 | - $code = self::ICONV_UNUSABLE; |
|
476 | - } elseif (($c = strlen($r)) < 9000) { |
|
477 | - $code = self::ICONV_TRUNCATES; |
|
478 | - } elseif ($c > 9000) { |
|
479 | - trigger_error('Your copy of iconv is extremely buggy. Please notify HTML Purifier maintainers: include your iconv version as per phpversion()', E_USER_ERROR); |
|
480 | - } else { |
|
481 | - $code = self::ICONV_OK; |
|
482 | - } |
|
483 | - } |
|
484 | - return $code; |
|
485 | - } |
|
486 | - |
|
487 | - /** |
|
488 | - * This expensive function tests whether or not a given character |
|
489 | - * encoding supports ASCII. 7/8-bit encodings like Shift_JIS will |
|
490 | - * fail this test, and require special processing. Variable width |
|
491 | - * encodings shouldn't ever fail. |
|
492 | - * |
|
493 | - * @param string $encoding Encoding name to test, as per iconv format |
|
494 | - * @param bool $bypass Whether or not to bypass the precompiled arrays. |
|
495 | - * @return Array of UTF-8 characters to their corresponding ASCII, |
|
496 | - * which can be used to "undo" any overzealous iconv action. |
|
497 | - */ |
|
498 | - public static function testEncodingSupportsASCII($encoding, $bypass = false) { |
|
499 | - // All calls to iconv here are unsafe, proof by case analysis: |
|
500 | - // If ICONV_OK, no difference. |
|
501 | - // If ICONV_TRUNCATE, all calls involve one character inputs, |
|
502 | - // so bug is not triggered. |
|
503 | - // If ICONV_UNUSABLE, this call is irrelevant |
|
504 | - static $encodings = array(); |
|
505 | - if (!$bypass) { |
|
506 | - if (isset($encodings[$encoding])) return $encodings[$encoding]; |
|
507 | - $lenc = strtolower($encoding); |
|
508 | - switch ($lenc) { |
|
509 | - case 'shift_jis': |
|
510 | - return array("\xC2\xA5" => '\\', "\xE2\x80\xBE" => '~'); |
|
511 | - case 'johab': |
|
512 | - return array("\xE2\x82\xA9" => '\\'); |
|
513 | - } |
|
514 | - if (strpos($lenc, 'iso-8859-') === 0) return array(); |
|
515 | - } |
|
516 | - $ret = array(); |
|
517 | - if (self::unsafeIconv('UTF-8', $encoding, 'a') === false) return false; |
|
518 | - for ($i = 0x20; $i <= 0x7E; $i++) { // all printable ASCII chars |
|
519 | - $c = chr($i); // UTF-8 char |
|
520 | - $r = self::unsafeIconv('UTF-8', "$encoding//IGNORE", $c); // initial conversion |
|
521 | - if ( |
|
522 | - $r === '' || |
|
523 | - // This line is needed for iconv implementations that do not |
|
524 | - // omit characters that do not exist in the target character set |
|
525 | - ($r === $c && self::unsafeIconv($encoding, 'UTF-8//IGNORE', $r) !== $c) |
|
526 | - ) { |
|
527 | - // Reverse engineer: what's the UTF-8 equiv of this byte |
|
528 | - // sequence? This assumes that there's no variable width |
|
529 | - // encoding that doesn't support ASCII. |
|
530 | - $ret[self::unsafeIconv($encoding, 'UTF-8//IGNORE', $c)] = $c; |
|
531 | - } |
|
532 | - } |
|
533 | - $encodings[$encoding] = $ret; |
|
534 | - return $ret; |
|
535 | - } |
|
10 | + /** |
|
11 | + * Constructor throws fatal error if you attempt to instantiate class |
|
12 | + */ |
|
13 | + private function __construct() { |
|
14 | + trigger_error('Cannot instantiate encoder, call methods statically', E_USER_ERROR); |
|
15 | + } |
|
16 | + |
|
17 | + /** |
|
18 | + * Error-handler that mutes errors, alternative to shut-up operator. |
|
19 | + */ |
|
20 | + public static function muteErrorHandler() {} |
|
21 | + |
|
22 | + /** |
|
23 | + * iconv wrapper which mutes errors, but doesn't work around bugs. |
|
24 | + */ |
|
25 | + public static function unsafeIconv($in, $out, $text) { |
|
26 | + set_error_handler(array('HTMLPurifier_Encoder', 'muteErrorHandler')); |
|
27 | + $r = iconv($in, $out, $text); |
|
28 | + restore_error_handler(); |
|
29 | + return $r; |
|
30 | + } |
|
31 | + |
|
32 | + /** |
|
33 | + * iconv wrapper which mutes errors and works around bugs. |
|
34 | + */ |
|
35 | + public static function iconv($in, $out, $text, $max_chunk_size = 8000) { |
|
36 | + $code = self::testIconvTruncateBug(); |
|
37 | + if ($code == self::ICONV_OK) { |
|
38 | + return self::unsafeIconv($in, $out, $text); |
|
39 | + } elseif ($code == self::ICONV_TRUNCATES) { |
|
40 | + // we can only work around this if the input character set |
|
41 | + // is utf-8 |
|
42 | + if ($in == 'utf-8') { |
|
43 | + if ($max_chunk_size < 4) { |
|
44 | + trigger_error('max_chunk_size is too small', E_USER_WARNING); |
|
45 | + return false; |
|
46 | + } |
|
47 | + // split into 8000 byte chunks, but be careful to handle |
|
48 | + // multibyte boundaries properly |
|
49 | + if (($c = strlen($text)) <= $max_chunk_size) { |
|
50 | + return self::unsafeIconv($in, $out, $text); |
|
51 | + } |
|
52 | + $r = ''; |
|
53 | + $i = 0; |
|
54 | + while (true) { |
|
55 | + if ($i + $max_chunk_size >= $c) { |
|
56 | + $r .= self::unsafeIconv($in, $out, substr($text, $i)); |
|
57 | + break; |
|
58 | + } |
|
59 | + // wibble the boundary |
|
60 | + if (0x80 != (0xC0 & ord($text[$i + $max_chunk_size]))) { |
|
61 | + $chunk_size = $max_chunk_size; |
|
62 | + } elseif (0x80 != (0xC0 & ord($text[$i + $max_chunk_size - 1]))) { |
|
63 | + $chunk_size = $max_chunk_size - 1; |
|
64 | + } elseif (0x80 != (0xC0 & ord($text[$i + $max_chunk_size - 2]))) { |
|
65 | + $chunk_size = $max_chunk_size - 2; |
|
66 | + } elseif (0x80 != (0xC0 & ord($text[$i + $max_chunk_size - 3]))) { |
|
67 | + $chunk_size = $max_chunk_size - 3; |
|
68 | + } else { |
|
69 | + return false; // rather confusing UTF-8... |
|
70 | + } |
|
71 | + $chunk = substr($text, $i, $chunk_size); // substr doesn't mind overlong lengths |
|
72 | + $r .= self::unsafeIconv($in, $out, $chunk); |
|
73 | + $i += $chunk_size; |
|
74 | + } |
|
75 | + return $r; |
|
76 | + } else { |
|
77 | + return false; |
|
78 | + } |
|
79 | + } else { |
|
80 | + return false; |
|
81 | + } |
|
82 | + } |
|
83 | + |
|
84 | + /** |
|
85 | + * Cleans a UTF-8 string for well-formedness and SGML validity |
|
86 | + * |
|
87 | + * It will parse according to UTF-8 and return a valid UTF8 string, with |
|
88 | + * non-SGML codepoints excluded. |
|
89 | + * |
|
90 | + * @note Just for reference, the non-SGML code points are 0 to 31 and |
|
91 | + * 127 to 159, inclusive. However, we allow code points 9, 10 |
|
92 | + * and 13, which are the tab, line feed and carriage return |
|
93 | + * respectively. 128 and above the code points map to multibyte |
|
94 | + * UTF-8 representations. |
|
95 | + * |
|
96 | + * @note Fallback code adapted from utf8ToUnicode by Henri Sivonen and |
|
97 | + * [email protected] at <http://iki.fi/hsivonen/php-utf8/> under the |
|
98 | + * LGPL license. Notes on what changed are inside, but in general, |
|
99 | + * the original code transformed UTF-8 text into an array of integer |
|
100 | + * Unicode codepoints. Understandably, transforming that back to |
|
101 | + * a string would be somewhat expensive, so the function was modded to |
|
102 | + * directly operate on the string. However, this discourages code |
|
103 | + * reuse, and the logic enumerated here would be useful for any |
|
104 | + * function that needs to be able to understand UTF-8 characters. |
|
105 | + * As of right now, only smart lossless character encoding converters |
|
106 | + * would need that, and I'm probably not going to implement them. |
|
107 | + * Once again, PHP 6 should solve all our problems. |
|
108 | + */ |
|
109 | + public static function cleanUTF8($str, $force_php = false) { |
|
110 | + |
|
111 | + // UTF-8 validity is checked since PHP 4.3.5 |
|
112 | + // This is an optimization: if the string is already valid UTF-8, no |
|
113 | + // need to do PHP stuff. 99% of the time, this will be the case. |
|
114 | + // The regexp matches the XML char production, as well as well as excluding |
|
115 | + // non-SGML codepoints U+007F to U+009F |
|
116 | + if (preg_match('/^[\x{9}\x{A}\x{D}\x{20}-\x{7E}\x{A0}-\x{D7FF}\x{E000}-\x{FFFD}\x{10000}-\x{10FFFF}]*$/Du', $str)) { |
|
117 | + return $str; |
|
118 | + } |
|
119 | + |
|
120 | + $mState = 0; // cached expected number of octets after the current octet |
|
121 | + // until the beginning of the next UTF8 character sequence |
|
122 | + $mUcs4 = 0; // cached Unicode character |
|
123 | + $mBytes = 1; // cached expected number of octets in the current sequence |
|
124 | + |
|
125 | + // original code involved an $out that was an array of Unicode |
|
126 | + // codepoints. Instead of having to convert back into UTF-8, we've |
|
127 | + // decided to directly append valid UTF-8 characters onto a string |
|
128 | + // $out once they're done. $char accumulates raw bytes, while $mUcs4 |
|
129 | + // turns into the Unicode code point, so there's some redundancy. |
|
130 | + |
|
131 | + $out = ''; |
|
132 | + $char = ''; |
|
133 | + |
|
134 | + $len = strlen($str); |
|
135 | + for($i = 0; $i < $len; $i++) { |
|
136 | + $in = ord($str{$i}); |
|
137 | + $char .= $str[$i]; // append byte to char |
|
138 | + if (0 == $mState) { |
|
139 | + // When mState is zero we expect either a US-ASCII character |
|
140 | + // or a multi-octet sequence. |
|
141 | + if (0 == (0x80 & ($in))) { |
|
142 | + // US-ASCII, pass straight through. |
|
143 | + if (($in <= 31 || $in == 127) && |
|
144 | + !($in == 9 || $in == 13 || $in == 10) // save \r\t\n |
|
145 | + ) { |
|
146 | + // control characters, remove |
|
147 | + } else { |
|
148 | + $out .= $char; |
|
149 | + } |
|
150 | + // reset |
|
151 | + $char = ''; |
|
152 | + $mBytes = 1; |
|
153 | + } elseif (0xC0 == (0xE0 & ($in))) { |
|
154 | + // First octet of 2 octet sequence |
|
155 | + $mUcs4 = ($in); |
|
156 | + $mUcs4 = ($mUcs4 & 0x1F) << 6; |
|
157 | + $mState = 1; |
|
158 | + $mBytes = 2; |
|
159 | + } elseif (0xE0 == (0xF0 & ($in))) { |
|
160 | + // First octet of 3 octet sequence |
|
161 | + $mUcs4 = ($in); |
|
162 | + $mUcs4 = ($mUcs4 & 0x0F) << 12; |
|
163 | + $mState = 2; |
|
164 | + $mBytes = 3; |
|
165 | + } elseif (0xF0 == (0xF8 & ($in))) { |
|
166 | + // First octet of 4 octet sequence |
|
167 | + $mUcs4 = ($in); |
|
168 | + $mUcs4 = ($mUcs4 & 0x07) << 18; |
|
169 | + $mState = 3; |
|
170 | + $mBytes = 4; |
|
171 | + } elseif (0xF8 == (0xFC & ($in))) { |
|
172 | + // First octet of 5 octet sequence. |
|
173 | + // |
|
174 | + // This is illegal because the encoded codepoint must be |
|
175 | + // either: |
|
176 | + // (a) not the shortest form or |
|
177 | + // (b) outside the Unicode range of 0-0x10FFFF. |
|
178 | + // Rather than trying to resynchronize, we will carry on |
|
179 | + // until the end of the sequence and let the later error |
|
180 | + // handling code catch it. |
|
181 | + $mUcs4 = ($in); |
|
182 | + $mUcs4 = ($mUcs4 & 0x03) << 24; |
|
183 | + $mState = 4; |
|
184 | + $mBytes = 5; |
|
185 | + } elseif (0xFC == (0xFE & ($in))) { |
|
186 | + // First octet of 6 octet sequence, see comments for 5 |
|
187 | + // octet sequence. |
|
188 | + $mUcs4 = ($in); |
|
189 | + $mUcs4 = ($mUcs4 & 1) << 30; |
|
190 | + $mState = 5; |
|
191 | + $mBytes = 6; |
|
192 | + } else { |
|
193 | + // Current octet is neither in the US-ASCII range nor a |
|
194 | + // legal first octet of a multi-octet sequence. |
|
195 | + $mState = 0; |
|
196 | + $mUcs4 = 0; |
|
197 | + $mBytes = 1; |
|
198 | + $char = ''; |
|
199 | + } |
|
200 | + } else { |
|
201 | + // When mState is non-zero, we expect a continuation of the |
|
202 | + // multi-octet sequence |
|
203 | + if (0x80 == (0xC0 & ($in))) { |
|
204 | + // Legal continuation. |
|
205 | + $shift = ($mState - 1) * 6; |
|
206 | + $tmp = $in; |
|
207 | + $tmp = ($tmp & 0x0000003F) << $shift; |
|
208 | + $mUcs4 |= $tmp; |
|
209 | + |
|
210 | + if (0 == --$mState) { |
|
211 | + // End of the multi-octet sequence. mUcs4 now contains |
|
212 | + // the final Unicode codepoint to be output |
|
213 | + |
|
214 | + // Check for illegal sequences and codepoints. |
|
215 | + |
|
216 | + // From Unicode 3.1, non-shortest form is illegal |
|
217 | + if (((2 == $mBytes) && ($mUcs4 < 0x0080)) || |
|
218 | + ((3 == $mBytes) && ($mUcs4 < 0x0800)) || |
|
219 | + ((4 == $mBytes) && ($mUcs4 < 0x10000)) || |
|
220 | + (4 < $mBytes) || |
|
221 | + // From Unicode 3.2, surrogate characters = illegal |
|
222 | + (($mUcs4 & 0xFFFFF800) == 0xD800) || |
|
223 | + // Codepoints outside the Unicode range are illegal |
|
224 | + ($mUcs4 > 0x10FFFF) |
|
225 | + ) { |
|
226 | + |
|
227 | + } elseif (0xFEFF != $mUcs4 && // omit BOM |
|
228 | + // check for valid Char unicode codepoints |
|
229 | + ( |
|
230 | + 0x9 == $mUcs4 || |
|
231 | + 0xA == $mUcs4 || |
|
232 | + 0xD == $mUcs4 || |
|
233 | + (0x20 <= $mUcs4 && 0x7E >= $mUcs4) || |
|
234 | + // 7F-9F is not strictly prohibited by XML, |
|
235 | + // but it is non-SGML, and thus we don't allow it |
|
236 | + (0xA0 <= $mUcs4 && 0xD7FF >= $mUcs4) || |
|
237 | + (0x10000 <= $mUcs4 && 0x10FFFF >= $mUcs4) |
|
238 | + ) |
|
239 | + ) { |
|
240 | + $out .= $char; |
|
241 | + } |
|
242 | + // initialize UTF8 cache (reset) |
|
243 | + $mState = 0; |
|
244 | + $mUcs4 = 0; |
|
245 | + $mBytes = 1; |
|
246 | + $char = ''; |
|
247 | + } |
|
248 | + } else { |
|
249 | + // ((0xC0 & (*in) != 0x80) && (mState != 0)) |
|
250 | + // Incomplete multi-octet sequence. |
|
251 | + // used to result in complete fail, but we'll reset |
|
252 | + $mState = 0; |
|
253 | + $mUcs4 = 0; |
|
254 | + $mBytes = 1; |
|
255 | + $char =''; |
|
256 | + } |
|
257 | + } |
|
258 | + } |
|
259 | + return $out; |
|
260 | + } |
|
261 | + |
|
262 | + /** |
|
263 | + * Translates a Unicode codepoint into its corresponding UTF-8 character. |
|
264 | + * @note Based on Feyd's function at |
|
265 | + * <http://forums.devnetwork.net/viewtopic.php?p=191404#191404>, |
|
266 | + * which is in public domain. |
|
267 | + * @note While we're going to do code point parsing anyway, a good |
|
268 | + * optimization would be to refuse to translate code points that |
|
269 | + * are non-SGML characters. However, this could lead to duplication. |
|
270 | + * @note This is very similar to the unichr function in |
|
271 | + * maintenance/generate-entity-file.php (although this is superior, |
|
272 | + * due to its sanity checks). |
|
273 | + */ |
|
274 | + |
|
275 | + // +----------+----------+----------+----------+ |
|
276 | + // | 33222222 | 22221111 | 111111 | | |
|
277 | + // | 10987654 | 32109876 | 54321098 | 76543210 | bit |
|
278 | + // +----------+----------+----------+----------+ |
|
279 | + // | | | | 0xxxxxxx | 1 byte 0x00000000..0x0000007F |
|
280 | + // | | | 110yyyyy | 10xxxxxx | 2 byte 0x00000080..0x000007FF |
|
281 | + // | | 1110zzzz | 10yyyyyy | 10xxxxxx | 3 byte 0x00000800..0x0000FFFF |
|
282 | + // | 11110www | 10wwzzzz | 10yyyyyy | 10xxxxxx | 4 byte 0x00010000..0x0010FFFF |
|
283 | + // +----------+----------+----------+----------+ |
|
284 | + // | 00000000 | 00011111 | 11111111 | 11111111 | Theoretical upper limit of legal scalars: 2097151 (0x001FFFFF) |
|
285 | + // | 00000000 | 00010000 | 11111111 | 11111111 | Defined upper limit of legal scalar codes |
|
286 | + // +----------+----------+----------+----------+ |
|
287 | + |
|
288 | + public static function unichr($code) { |
|
289 | + if($code > 1114111 or $code < 0 or |
|
290 | + ($code >= 55296 and $code <= 57343) ) { |
|
291 | + // bits are set outside the "valid" range as defined |
|
292 | + // by UNICODE 4.1.0 |
|
293 | + return ''; |
|
294 | + } |
|
295 | + |
|
296 | + $x = $y = $z = $w = 0; |
|
297 | + if ($code < 128) { |
|
298 | + // regular ASCII character |
|
299 | + $x = $code; |
|
300 | + } else { |
|
301 | + // set up bits for UTF-8 |
|
302 | + $x = ($code & 63) | 128; |
|
303 | + if ($code < 2048) { |
|
304 | + $y = (($code & 2047) >> 6) | 192; |
|
305 | + } else { |
|
306 | + $y = (($code & 4032) >> 6) | 128; |
|
307 | + if($code < 65536) { |
|
308 | + $z = (($code >> 12) & 15) | 224; |
|
309 | + } else { |
|
310 | + $z = (($code >> 12) & 63) | 128; |
|
311 | + $w = (($code >> 18) & 7) | 240; |
|
312 | + } |
|
313 | + } |
|
314 | + } |
|
315 | + // set up the actual character |
|
316 | + $ret = ''; |
|
317 | + if($w) $ret .= chr($w); |
|
318 | + if($z) $ret .= chr($z); |
|
319 | + if($y) $ret .= chr($y); |
|
320 | + $ret .= chr($x); |
|
321 | + |
|
322 | + return $ret; |
|
323 | + } |
|
324 | + |
|
325 | + public static function iconvAvailable() { |
|
326 | + static $iconv = null; |
|
327 | + if ($iconv === null) { |
|
328 | + $iconv = function_exists('iconv') && self::testIconvTruncateBug() != self::ICONV_UNUSABLE; |
|
329 | + } |
|
330 | + return $iconv; |
|
331 | + } |
|
332 | + |
|
333 | + /** |
|
334 | + * Converts a string to UTF-8 based on configuration. |
|
335 | + */ |
|
336 | + public static function convertToUTF8($str, $config, $context) { |
|
337 | + $encoding = $config->get('Core.Encoding'); |
|
338 | + if ($encoding === 'utf-8') return $str; |
|
339 | + static $iconv = null; |
|
340 | + if ($iconv === null) $iconv = self::iconvAvailable(); |
|
341 | + if ($iconv && !$config->get('Test.ForceNoIconv')) { |
|
342 | + // unaffected by bugs, since UTF-8 support all characters |
|
343 | + $str = self::unsafeIconv($encoding, 'utf-8//IGNORE', $str); |
|
344 | + if ($str === false) { |
|
345 | + // $encoding is not a valid encoding |
|
346 | + trigger_error('Invalid encoding ' . $encoding, E_USER_ERROR); |
|
347 | + return ''; |
|
348 | + } |
|
349 | + // If the string is bjorked by Shift_JIS or a similar encoding |
|
350 | + // that doesn't support all of ASCII, convert the naughty |
|
351 | + // characters to their true byte-wise ASCII/UTF-8 equivalents. |
|
352 | + $str = strtr($str, self::testEncodingSupportsASCII($encoding)); |
|
353 | + return $str; |
|
354 | + } elseif ($encoding === 'iso-8859-1') { |
|
355 | + $str = utf8_encode($str); |
|
356 | + return $str; |
|
357 | + } |
|
358 | + trigger_error('Encoding not supported, please install iconv', E_USER_ERROR); |
|
359 | + } |
|
360 | + |
|
361 | + /** |
|
362 | + * Converts a string from UTF-8 based on configuration. |
|
363 | + * @note Currently, this is a lossy conversion, with unexpressable |
|
364 | + * characters being omitted. |
|
365 | + */ |
|
366 | + public static function convertFromUTF8($str, $config, $context) { |
|
367 | + $encoding = $config->get('Core.Encoding'); |
|
368 | + if ($escape = $config->get('Core.EscapeNonASCIICharacters')) { |
|
369 | + $str = self::convertToASCIIDumbLossless($str); |
|
370 | + } |
|
371 | + if ($encoding === 'utf-8') return $str; |
|
372 | + static $iconv = null; |
|
373 | + if ($iconv === null) $iconv = self::iconvAvailable(); |
|
374 | + if ($iconv && !$config->get('Test.ForceNoIconv')) { |
|
375 | + // Undo our previous fix in convertToUTF8, otherwise iconv will barf |
|
376 | + $ascii_fix = self::testEncodingSupportsASCII($encoding); |
|
377 | + if (!$escape && !empty($ascii_fix)) { |
|
378 | + $clear_fix = array(); |
|
379 | + foreach ($ascii_fix as $utf8 => $native) $clear_fix[$utf8] = ''; |
|
380 | + $str = strtr($str, $clear_fix); |
|
381 | + } |
|
382 | + $str = strtr($str, array_flip($ascii_fix)); |
|
383 | + // Normal stuff |
|
384 | + $str = self::iconv('utf-8', $encoding . '//IGNORE', $str); |
|
385 | + return $str; |
|
386 | + } elseif ($encoding === 'iso-8859-1') { |
|
387 | + $str = utf8_decode($str); |
|
388 | + return $str; |
|
389 | + } |
|
390 | + trigger_error('Encoding not supported', E_USER_ERROR); |
|
391 | + // You might be tempted to assume that the ASCII representation |
|
392 | + // might be OK, however, this is *not* universally true over all |
|
393 | + // encodings. So we take the conservative route here, rather |
|
394 | + // than forcibly turn on %Core.EscapeNonASCIICharacters |
|
395 | + } |
|
396 | + |
|
397 | + /** |
|
398 | + * Lossless (character-wise) conversion of HTML to ASCII |
|
399 | + * @param $str UTF-8 string to be converted to ASCII |
|
400 | + * @returns ASCII encoded string with non-ASCII character entity-ized |
|
401 | + * @warning Adapted from MediaWiki, claiming fair use: this is a common |
|
402 | + * algorithm. If you disagree with this license fudgery, |
|
403 | + * implement it yourself. |
|
404 | + * @note Uses decimal numeric entities since they are best supported. |
|
405 | + * @note This is a DUMB function: it has no concept of keeping |
|
406 | + * character entities that the projected character encoding |
|
407 | + * can allow. We could possibly implement a smart version |
|
408 | + * but that would require it to also know which Unicode |
|
409 | + * codepoints the charset supported (not an easy task). |
|
410 | + * @note Sort of with cleanUTF8() but it assumes that $str is |
|
411 | + * well-formed UTF-8 |
|
412 | + */ |
|
413 | + public static function convertToASCIIDumbLossless($str) { |
|
414 | + $bytesleft = 0; |
|
415 | + $result = ''; |
|
416 | + $working = 0; |
|
417 | + $len = strlen($str); |
|
418 | + for( $i = 0; $i < $len; $i++ ) { |
|
419 | + $bytevalue = ord( $str[$i] ); |
|
420 | + if( $bytevalue <= 0x7F ) { //0xxx xxxx |
|
421 | + $result .= chr( $bytevalue ); |
|
422 | + $bytesleft = 0; |
|
423 | + } elseif( $bytevalue <= 0xBF ) { //10xx xxxx |
|
424 | + $working = $working << 6; |
|
425 | + $working += ($bytevalue & 0x3F); |
|
426 | + $bytesleft--; |
|
427 | + if( $bytesleft <= 0 ) { |
|
428 | + $result .= "&#" . $working . ";"; |
|
429 | + } |
|
430 | + } elseif( $bytevalue <= 0xDF ) { //110x xxxx |
|
431 | + $working = $bytevalue & 0x1F; |
|
432 | + $bytesleft = 1; |
|
433 | + } elseif( $bytevalue <= 0xEF ) { //1110 xxxx |
|
434 | + $working = $bytevalue & 0x0F; |
|
435 | + $bytesleft = 2; |
|
436 | + } else { //1111 0xxx |
|
437 | + $working = $bytevalue & 0x07; |
|
438 | + $bytesleft = 3; |
|
439 | + } |
|
440 | + } |
|
441 | + return $result; |
|
442 | + } |
|
443 | + |
|
444 | + /** No bugs detected in iconv. */ |
|
445 | + const ICONV_OK = 0; |
|
446 | + |
|
447 | + /** Iconv truncates output if converting from UTF-8 to another |
|
448 | + * character set with //IGNORE, and a non-encodable character is found */ |
|
449 | + const ICONV_TRUNCATES = 1; |
|
450 | + |
|
451 | + /** Iconv does not support //IGNORE, making it unusable for |
|
452 | + * transcoding purposes */ |
|
453 | + const ICONV_UNUSABLE = 2; |
|
454 | + |
|
455 | + /** |
|
456 | + * glibc iconv has a known bug where it doesn't handle the magic |
|
457 | + * //IGNORE stanza correctly. In particular, rather than ignore |
|
458 | + * characters, it will return an EILSEQ after consuming some number |
|
459 | + * of characters, and expect you to restart iconv as if it were |
|
460 | + * an E2BIG. Old versions of PHP did not respect the errno, and |
|
461 | + * returned the fragment, so as a result you would see iconv |
|
462 | + * mysteriously truncating output. We can work around this by |
|
463 | + * manually chopping our input into segments of about 8000 |
|
464 | + * characters, as long as PHP ignores the error code. If PHP starts |
|
465 | + * paying attention to the error code, iconv becomes unusable. |
|
466 | + * |
|
467 | + * @returns Error code indicating severity of bug. |
|
468 | + */ |
|
469 | + public static function testIconvTruncateBug() { |
|
470 | + static $code = null; |
|
471 | + if ($code === null) { |
|
472 | + // better not use iconv, otherwise infinite loop! |
|
473 | + $r = self::unsafeIconv('utf-8', 'ascii//IGNORE', "\xCE\xB1" . str_repeat('a', 9000)); |
|
474 | + if ($r === false) { |
|
475 | + $code = self::ICONV_UNUSABLE; |
|
476 | + } elseif (($c = strlen($r)) < 9000) { |
|
477 | + $code = self::ICONV_TRUNCATES; |
|
478 | + } elseif ($c > 9000) { |
|
479 | + trigger_error('Your copy of iconv is extremely buggy. Please notify HTML Purifier maintainers: include your iconv version as per phpversion()', E_USER_ERROR); |
|
480 | + } else { |
|
481 | + $code = self::ICONV_OK; |
|
482 | + } |
|
483 | + } |
|
484 | + return $code; |
|
485 | + } |
|
486 | + |
|
487 | + /** |
|
488 | + * This expensive function tests whether or not a given character |
|
489 | + * encoding supports ASCII. 7/8-bit encodings like Shift_JIS will |
|
490 | + * fail this test, and require special processing. Variable width |
|
491 | + * encodings shouldn't ever fail. |
|
492 | + * |
|
493 | + * @param string $encoding Encoding name to test, as per iconv format |
|
494 | + * @param bool $bypass Whether or not to bypass the precompiled arrays. |
|
495 | + * @return Array of UTF-8 characters to their corresponding ASCII, |
|
496 | + * which can be used to "undo" any overzealous iconv action. |
|
497 | + */ |
|
498 | + public static function testEncodingSupportsASCII($encoding, $bypass = false) { |
|
499 | + // All calls to iconv here are unsafe, proof by case analysis: |
|
500 | + // If ICONV_OK, no difference. |
|
501 | + // If ICONV_TRUNCATE, all calls involve one character inputs, |
|
502 | + // so bug is not triggered. |
|
503 | + // If ICONV_UNUSABLE, this call is irrelevant |
|
504 | + static $encodings = array(); |
|
505 | + if (!$bypass) { |
|
506 | + if (isset($encodings[$encoding])) return $encodings[$encoding]; |
|
507 | + $lenc = strtolower($encoding); |
|
508 | + switch ($lenc) { |
|
509 | + case 'shift_jis': |
|
510 | + return array("\xC2\xA5" => '\\', "\xE2\x80\xBE" => '~'); |
|
511 | + case 'johab': |
|
512 | + return array("\xE2\x82\xA9" => '\\'); |
|
513 | + } |
|
514 | + if (strpos($lenc, 'iso-8859-') === 0) return array(); |
|
515 | + } |
|
516 | + $ret = array(); |
|
517 | + if (self::unsafeIconv('UTF-8', $encoding, 'a') === false) return false; |
|
518 | + for ($i = 0x20; $i <= 0x7E; $i++) { // all printable ASCII chars |
|
519 | + $c = chr($i); // UTF-8 char |
|
520 | + $r = self::unsafeIconv('UTF-8', "$encoding//IGNORE", $c); // initial conversion |
|
521 | + if ( |
|
522 | + $r === '' || |
|
523 | + // This line is needed for iconv implementations that do not |
|
524 | + // omit characters that do not exist in the target character set |
|
525 | + ($r === $c && self::unsafeIconv($encoding, 'UTF-8//IGNORE', $r) !== $c) |
|
526 | + ) { |
|
527 | + // Reverse engineer: what's the UTF-8 equiv of this byte |
|
528 | + // sequence? This assumes that there's no variable width |
|
529 | + // encoding that doesn't support ASCII. |
|
530 | + $ret[self::unsafeIconv($encoding, 'UTF-8//IGNORE', $c)] = $c; |
|
531 | + } |
|
532 | + } |
|
533 | + $encodings[$encoding] = $ret; |
|
534 | + return $ret; |
|
535 | + } |
|
536 | 536 | |
537 | 537 | |
538 | 538 | } |
@@ -314,9 +314,15 @@ discard block |
||
314 | 314 | } |
315 | 315 | // set up the actual character |
316 | 316 | $ret = ''; |
317 | - if($w) $ret .= chr($w); |
|
318 | - if($z) $ret .= chr($z); |
|
319 | - if($y) $ret .= chr($y); |
|
317 | + if($w) { |
|
318 | + $ret .= chr($w); |
|
319 | + } |
|
320 | + if($z) { |
|
321 | + $ret .= chr($z); |
|
322 | + } |
|
323 | + if($y) { |
|
324 | + $ret .= chr($y); |
|
325 | + } |
|
320 | 326 | $ret .= chr($x); |
321 | 327 | |
322 | 328 | return $ret; |
@@ -335,9 +341,13 @@ discard block |
||
335 | 341 | */ |
336 | 342 | public static function convertToUTF8($str, $config, $context) { |
337 | 343 | $encoding = $config->get('Core.Encoding'); |
338 | - if ($encoding === 'utf-8') return $str; |
|
344 | + if ($encoding === 'utf-8') { |
|
345 | + return $str; |
|
346 | + } |
|
339 | 347 | static $iconv = null; |
340 | - if ($iconv === null) $iconv = self::iconvAvailable(); |
|
348 | + if ($iconv === null) { |
|
349 | + $iconv = self::iconvAvailable(); |
|
350 | + } |
|
341 | 351 | if ($iconv && !$config->get('Test.ForceNoIconv')) { |
342 | 352 | // unaffected by bugs, since UTF-8 support all characters |
343 | 353 | $str = self::unsafeIconv($encoding, 'utf-8//IGNORE', $str); |
@@ -368,15 +378,21 @@ discard block |
||
368 | 378 | if ($escape = $config->get('Core.EscapeNonASCIICharacters')) { |
369 | 379 | $str = self::convertToASCIIDumbLossless($str); |
370 | 380 | } |
371 | - if ($encoding === 'utf-8') return $str; |
|
381 | + if ($encoding === 'utf-8') { |
|
382 | + return $str; |
|
383 | + } |
|
372 | 384 | static $iconv = null; |
373 | - if ($iconv === null) $iconv = self::iconvAvailable(); |
|
385 | + if ($iconv === null) { |
|
386 | + $iconv = self::iconvAvailable(); |
|
387 | + } |
|
374 | 388 | if ($iconv && !$config->get('Test.ForceNoIconv')) { |
375 | 389 | // Undo our previous fix in convertToUTF8, otherwise iconv will barf |
376 | 390 | $ascii_fix = self::testEncodingSupportsASCII($encoding); |
377 | 391 | if (!$escape && !empty($ascii_fix)) { |
378 | 392 | $clear_fix = array(); |
379 | - foreach ($ascii_fix as $utf8 => $native) $clear_fix[$utf8] = ''; |
|
393 | + foreach ($ascii_fix as $utf8 => $native) { |
|
394 | + $clear_fix[$utf8] = ''; |
|
395 | + } |
|
380 | 396 | $str = strtr($str, $clear_fix); |
381 | 397 | } |
382 | 398 | $str = strtr($str, array_flip($ascii_fix)); |
@@ -503,7 +519,9 @@ discard block |
||
503 | 519 | // If ICONV_UNUSABLE, this call is irrelevant |
504 | 520 | static $encodings = array(); |
505 | 521 | if (!$bypass) { |
506 | - if (isset($encodings[$encoding])) return $encodings[$encoding]; |
|
522 | + if (isset($encodings[$encoding])) { |
|
523 | + return $encodings[$encoding]; |
|
524 | + } |
|
507 | 525 | $lenc = strtolower($encoding); |
508 | 526 | switch ($lenc) { |
509 | 527 | case 'shift_jis': |
@@ -511,10 +529,14 @@ discard block |
||
511 | 529 | case 'johab': |
512 | 530 | return array("\xE2\x82\xA9" => '\\'); |
513 | 531 | } |
514 | - if (strpos($lenc, 'iso-8859-') === 0) return array(); |
|
532 | + if (strpos($lenc, 'iso-8859-') === 0) { |
|
533 | + return array(); |
|
534 | + } |
|
515 | 535 | } |
516 | 536 | $ret = array(); |
517 | - if (self::unsafeIconv('UTF-8', $encoding, 'a') === false) return false; |
|
537 | + if (self::unsafeIconv('UTF-8', $encoding, 'a') === false) { |
|
538 | + return false; |
|
539 | + } |
|
518 | 540 | for ($i = 0x20; $i <= 0x7E; $i++) { // all printable ASCII chars |
519 | 541 | $c = chr($i); // UTF-8 char |
520 | 542 | $r = self::unsafeIconv('UTF-8', "$encoding//IGNORE", $c); // initial conversion |
@@ -132,7 +132,7 @@ discard block |
||
132 | 132 | $char = ''; |
133 | 133 | |
134 | 134 | $len = strlen($str); |
135 | - for($i = 0; $i < $len; $i++) { |
|
135 | + for ($i = 0; $i < $len; $i++) { |
|
136 | 136 | $in = ord($str{$i}); |
137 | 137 | $char .= $str[$i]; // append byte to char |
138 | 138 | if (0 == $mState) { |
@@ -252,7 +252,7 @@ discard block |
||
252 | 252 | $mState = 0; |
253 | 253 | $mUcs4 = 0; |
254 | 254 | $mBytes = 1; |
255 | - $char =''; |
|
255 | + $char = ''; |
|
256 | 256 | } |
257 | 257 | } |
258 | 258 | } |
@@ -286,8 +286,8 @@ discard block |
||
286 | 286 | // +----------+----------+----------+----------+ |
287 | 287 | |
288 | 288 | public static function unichr($code) { |
289 | - if($code > 1114111 or $code < 0 or |
|
290 | - ($code >= 55296 and $code <= 57343) ) { |
|
289 | + if ($code > 1114111 or $code < 0 or |
|
290 | + ($code >= 55296 and $code <= 57343)) { |
|
291 | 291 | // bits are set outside the "valid" range as defined |
292 | 292 | // by UNICODE 4.1.0 |
293 | 293 | return ''; |
@@ -304,19 +304,19 @@ discard block |
||
304 | 304 | $y = (($code & 2047) >> 6) | 192; |
305 | 305 | } else { |
306 | 306 | $y = (($code & 4032) >> 6) | 128; |
307 | - if($code < 65536) { |
|
307 | + if ($code < 65536) { |
|
308 | 308 | $z = (($code >> 12) & 15) | 224; |
309 | 309 | } else { |
310 | 310 | $z = (($code >> 12) & 63) | 128; |
311 | - $w = (($code >> 18) & 7) | 240; |
|
311 | + $w = (($code >> 18) & 7) | 240; |
|
312 | 312 | } |
313 | 313 | } |
314 | 314 | } |
315 | 315 | // set up the actual character |
316 | 316 | $ret = ''; |
317 | - if($w) $ret .= chr($w); |
|
318 | - if($z) $ret .= chr($z); |
|
319 | - if($y) $ret .= chr($y); |
|
317 | + if ($w) $ret .= chr($w); |
|
318 | + if ($z) $ret .= chr($z); |
|
319 | + if ($y) $ret .= chr($y); |
|
320 | 320 | $ret .= chr($x); |
321 | 321 | |
322 | 322 | return $ret; |
@@ -343,7 +343,7 @@ discard block |
||
343 | 343 | $str = self::unsafeIconv($encoding, 'utf-8//IGNORE', $str); |
344 | 344 | if ($str === false) { |
345 | 345 | // $encoding is not a valid encoding |
346 | - trigger_error('Invalid encoding ' . $encoding, E_USER_ERROR); |
|
346 | + trigger_error('Invalid encoding '.$encoding, E_USER_ERROR); |
|
347 | 347 | return ''; |
348 | 348 | } |
349 | 349 | // If the string is bjorked by Shift_JIS or a similar encoding |
@@ -381,7 +381,7 @@ discard block |
||
381 | 381 | } |
382 | 382 | $str = strtr($str, array_flip($ascii_fix)); |
383 | 383 | // Normal stuff |
384 | - $str = self::iconv('utf-8', $encoding . '//IGNORE', $str); |
|
384 | + $str = self::iconv('utf-8', $encoding.'//IGNORE', $str); |
|
385 | 385 | return $str; |
386 | 386 | } elseif ($encoding === 'iso-8859-1') { |
387 | 387 | $str = utf8_decode($str); |
@@ -415,22 +415,22 @@ discard block |
||
415 | 415 | $result = ''; |
416 | 416 | $working = 0; |
417 | 417 | $len = strlen($str); |
418 | - for( $i = 0; $i < $len; $i++ ) { |
|
419 | - $bytevalue = ord( $str[$i] ); |
|
420 | - if( $bytevalue <= 0x7F ) { //0xxx xxxx |
|
421 | - $result .= chr( $bytevalue ); |
|
418 | + for ($i = 0; $i < $len; $i++) { |
|
419 | + $bytevalue = ord($str[$i]); |
|
420 | + if ($bytevalue <= 0x7F) { //0xxx xxxx |
|
421 | + $result .= chr($bytevalue); |
|
422 | 422 | $bytesleft = 0; |
423 | - } elseif( $bytevalue <= 0xBF ) { //10xx xxxx |
|
423 | + } elseif ($bytevalue <= 0xBF) { //10xx xxxx |
|
424 | 424 | $working = $working << 6; |
425 | 425 | $working += ($bytevalue & 0x3F); |
426 | 426 | $bytesleft--; |
427 | - if( $bytesleft <= 0 ) { |
|
428 | - $result .= "&#" . $working . ";"; |
|
427 | + if ($bytesleft <= 0) { |
|
428 | + $result .= "&#".$working.";"; |
|
429 | 429 | } |
430 | - } elseif( $bytevalue <= 0xDF ) { //110x xxxx |
|
430 | + } elseif ($bytevalue <= 0xDF) { //110x xxxx |
|
431 | 431 | $working = $bytevalue & 0x1F; |
432 | 432 | $bytesleft = 1; |
433 | - } elseif( $bytevalue <= 0xEF ) { //1110 xxxx |
|
433 | + } elseif ($bytevalue <= 0xEF) { //1110 xxxx |
|
434 | 434 | $working = $bytevalue & 0x0F; |
435 | 435 | $bytesleft = 2; |
436 | 436 | } else { //1111 0xxx |
@@ -470,7 +470,7 @@ discard block |
||
470 | 470 | static $code = null; |
471 | 471 | if ($code === null) { |
472 | 472 | // better not use iconv, otherwise infinite loop! |
473 | - $r = self::unsafeIconv('utf-8', 'ascii//IGNORE', "\xCE\xB1" . str_repeat('a', 9000)); |
|
473 | + $r = self::unsafeIconv('utf-8', 'ascii//IGNORE', "\xCE\xB1".str_repeat('a', 9000)); |
|
474 | 474 | if ($r === false) { |
475 | 475 | $code = self::ICONV_UNUSABLE; |
476 | 476 | } elseif (($c = strlen($r)) < 9000) { |
@@ -25,6 +25,9 @@ |
||
25 | 25 | |
26 | 26 | protected $lines = array(); |
27 | 27 | |
28 | + /** |
|
29 | + * @param HTMLPurifier_Context $context |
|
30 | + */ |
|
28 | 31 | public function __construct($context) { |
29 | 32 | $this->locale =& $context->get('Locale'); |
30 | 33 | $this->context = $context; |
@@ -7,202 +7,202 @@ |
||
7 | 7 | class HTMLPurifier_ErrorCollector |
8 | 8 | { |
9 | 9 | |
10 | - /** |
|
11 | - * Identifiers for the returned error array. These are purposely numeric |
|
12 | - * so list() can be used. |
|
13 | - */ |
|
14 | - const LINENO = 0; |
|
15 | - const SEVERITY = 1; |
|
16 | - const MESSAGE = 2; |
|
17 | - const CHILDREN = 3; |
|
18 | - |
|
19 | - protected $errors; |
|
20 | - protected $_current; |
|
21 | - protected $_stacks = array(array()); |
|
22 | - protected $locale; |
|
23 | - protected $generator; |
|
24 | - protected $context; |
|
25 | - |
|
26 | - protected $lines = array(); |
|
27 | - |
|
28 | - public function __construct($context) { |
|
29 | - $this->locale =& $context->get('Locale'); |
|
30 | - $this->context = $context; |
|
31 | - $this->_current =& $this->_stacks[0]; |
|
32 | - $this->errors =& $this->_stacks[0]; |
|
33 | - } |
|
34 | - |
|
35 | - /** |
|
36 | - * Sends an error message to the collector for later use |
|
37 | - * @param $severity int Error severity, PHP error style (don't use E_USER_) |
|
38 | - * @param $msg string Error message text |
|
39 | - * @param $subst1 string First substitution for $msg |
|
40 | - * @param $subst2 string ... |
|
41 | - */ |
|
42 | - public function send($severity, $msg) { |
|
43 | - |
|
44 | - $args = array(); |
|
45 | - if (func_num_args() > 2) { |
|
46 | - $args = func_get_args(); |
|
47 | - array_shift($args); |
|
48 | - unset($args[0]); |
|
49 | - } |
|
50 | - |
|
51 | - $token = $this->context->get('CurrentToken', true); |
|
52 | - $line = $token ? $token->line : $this->context->get('CurrentLine', true); |
|
53 | - $col = $token ? $token->col : $this->context->get('CurrentCol', true); |
|
54 | - $attr = $this->context->get('CurrentAttr', true); |
|
55 | - |
|
56 | - // perform special substitutions, also add custom parameters |
|
57 | - $subst = array(); |
|
58 | - if (!is_null($token)) { |
|
59 | - $args['CurrentToken'] = $token; |
|
60 | - } |
|
61 | - if (!is_null($attr)) { |
|
62 | - $subst['$CurrentAttr.Name'] = $attr; |
|
63 | - if (isset($token->attr[$attr])) $subst['$CurrentAttr.Value'] = $token->attr[$attr]; |
|
64 | - } |
|
65 | - |
|
66 | - if (empty($args)) { |
|
67 | - $msg = $this->locale->getMessage($msg); |
|
68 | - } else { |
|
69 | - $msg = $this->locale->formatMessage($msg, $args); |
|
70 | - } |
|
71 | - |
|
72 | - if (!empty($subst)) $msg = strtr($msg, $subst); |
|
73 | - |
|
74 | - // (numerically indexed) |
|
75 | - $error = array( |
|
76 | - self::LINENO => $line, |
|
77 | - self::SEVERITY => $severity, |
|
78 | - self::MESSAGE => $msg, |
|
79 | - self::CHILDREN => array() |
|
80 | - ); |
|
81 | - $this->_current[] = $error; |
|
82 | - |
|
83 | - |
|
84 | - // NEW CODE BELOW ... |
|
85 | - |
|
86 | - $struct = null; |
|
87 | - // Top-level errors are either: |
|
88 | - // TOKEN type, if $value is set appropriately, or |
|
89 | - // "syntax" type, if $value is null |
|
90 | - $new_struct = new HTMLPurifier_ErrorStruct(); |
|
91 | - $new_struct->type = HTMLPurifier_ErrorStruct::TOKEN; |
|
92 | - if ($token) $new_struct->value = clone $token; |
|
93 | - if (is_int($line) && is_int($col)) { |
|
94 | - if (isset($this->lines[$line][$col])) { |
|
95 | - $struct = $this->lines[$line][$col]; |
|
96 | - } else { |
|
97 | - $struct = $this->lines[$line][$col] = $new_struct; |
|
98 | - } |
|
99 | - // These ksorts may present a performance problem |
|
100 | - ksort($this->lines[$line], SORT_NUMERIC); |
|
101 | - } else { |
|
102 | - if (isset($this->lines[-1])) { |
|
103 | - $struct = $this->lines[-1]; |
|
104 | - } else { |
|
105 | - $struct = $this->lines[-1] = $new_struct; |
|
106 | - } |
|
107 | - } |
|
108 | - ksort($this->lines, SORT_NUMERIC); |
|
109 | - |
|
110 | - // Now, check if we need to operate on a lower structure |
|
111 | - if (!empty($attr)) { |
|
112 | - $struct = $struct->getChild(HTMLPurifier_ErrorStruct::ATTR, $attr); |
|
113 | - if (!$struct->value) { |
|
114 | - $struct->value = array($attr, 'PUT VALUE HERE'); |
|
115 | - } |
|
116 | - } |
|
117 | - if (!empty($cssprop)) { |
|
118 | - $struct = $struct->getChild(HTMLPurifier_ErrorStruct::CSSPROP, $cssprop); |
|
119 | - if (!$struct->value) { |
|
120 | - // if we tokenize CSS this might be a little more difficult to do |
|
121 | - $struct->value = array($cssprop, 'PUT VALUE HERE'); |
|
122 | - } |
|
123 | - } |
|
124 | - |
|
125 | - // Ok, structs are all setup, now time to register the error |
|
126 | - $struct->addError($severity, $msg); |
|
127 | - } |
|
128 | - |
|
129 | - /** |
|
130 | - * Retrieves raw error data for custom formatter to use |
|
131 | - * @param List of arrays in format of array(line of error, |
|
132 | - * error severity, error message, |
|
133 | - * recursive sub-errors array) |
|
134 | - */ |
|
135 | - public function getRaw() { |
|
136 | - return $this->errors; |
|
137 | - } |
|
138 | - |
|
139 | - /** |
|
140 | - * Default HTML formatting implementation for error messages |
|
141 | - * @param $config Configuration array, vital for HTML output nature |
|
142 | - * @param $errors Errors array to display; used for recursion. |
|
143 | - */ |
|
144 | - public function getHTMLFormatted($config, $errors = null) { |
|
145 | - $ret = array(); |
|
146 | - |
|
147 | - $this->generator = new HTMLPurifier_Generator($config, $this->context); |
|
148 | - if ($errors === null) $errors = $this->errors; |
|
149 | - |
|
150 | - // 'At line' message needs to be removed |
|
151 | - |
|
152 | - // generation code for new structure goes here. It needs to be recursive. |
|
153 | - foreach ($this->lines as $line => $col_array) { |
|
154 | - if ($line == -1) continue; |
|
155 | - foreach ($col_array as $col => $struct) { |
|
156 | - $this->_renderStruct($ret, $struct, $line, $col); |
|
157 | - } |
|
158 | - } |
|
159 | - if (isset($this->lines[-1])) { |
|
160 | - $this->_renderStruct($ret, $this->lines[-1]); |
|
161 | - } |
|
162 | - |
|
163 | - if (empty($errors)) { |
|
164 | - return '<p>' . $this->locale->getMessage('ErrorCollector: No errors') . '</p>'; |
|
165 | - } else { |
|
166 | - return '<ul><li>' . implode('</li><li>', $ret) . '</li></ul>'; |
|
167 | - } |
|
168 | - |
|
169 | - } |
|
170 | - |
|
171 | - private function _renderStruct(&$ret, $struct, $line = null, $col = null) { |
|
172 | - $stack = array($struct); |
|
173 | - $context_stack = array(array()); |
|
174 | - while ($current = array_pop($stack)) { |
|
175 | - $context = array_pop($context_stack); |
|
176 | - foreach ($current->errors as $error) { |
|
177 | - list($severity, $msg) = $error; |
|
178 | - $string = ''; |
|
179 | - $string .= '<div>'; |
|
180 | - // W3C uses an icon to indicate the severity of the error. |
|
181 | - $error = $this->locale->getErrorName($severity); |
|
182 | - $string .= "<span class=\"error e$severity\"><strong>$error</strong></span> "; |
|
183 | - if (!is_null($line) && !is_null($col)) { |
|
184 | - $string .= "<em class=\"location\">Line $line, Column $col: </em> "; |
|
185 | - } else { |
|
186 | - $string .= '<em class="location">End of Document: </em> '; |
|
187 | - } |
|
188 | - $string .= '<strong class="description">' . $this->generator->escape($msg) . '</strong> '; |
|
189 | - $string .= '</div>'; |
|
190 | - // Here, have a marker for the character on the column appropriate. |
|
191 | - // Be sure to clip extremely long lines. |
|
192 | - //$string .= '<pre>'; |
|
193 | - //$string .= ''; |
|
194 | - //$string .= '</pre>'; |
|
195 | - $ret[] = $string; |
|
196 | - } |
|
197 | - foreach ($current->children as $type => $array) { |
|
198 | - $context[] = $current; |
|
199 | - $stack = array_merge($stack, array_reverse($array, true)); |
|
200 | - for ($i = count($array); $i > 0; $i--) { |
|
201 | - $context_stack[] = $context; |
|
202 | - } |
|
203 | - } |
|
204 | - } |
|
205 | - } |
|
10 | + /** |
|
11 | + * Identifiers for the returned error array. These are purposely numeric |
|
12 | + * so list() can be used. |
|
13 | + */ |
|
14 | + const LINENO = 0; |
|
15 | + const SEVERITY = 1; |
|
16 | + const MESSAGE = 2; |
|
17 | + const CHILDREN = 3; |
|
18 | + |
|
19 | + protected $errors; |
|
20 | + protected $_current; |
|
21 | + protected $_stacks = array(array()); |
|
22 | + protected $locale; |
|
23 | + protected $generator; |
|
24 | + protected $context; |
|
25 | + |
|
26 | + protected $lines = array(); |
|
27 | + |
|
28 | + public function __construct($context) { |
|
29 | + $this->locale =& $context->get('Locale'); |
|
30 | + $this->context = $context; |
|
31 | + $this->_current =& $this->_stacks[0]; |
|
32 | + $this->errors =& $this->_stacks[0]; |
|
33 | + } |
|
34 | + |
|
35 | + /** |
|
36 | + * Sends an error message to the collector for later use |
|
37 | + * @param $severity int Error severity, PHP error style (don't use E_USER_) |
|
38 | + * @param $msg string Error message text |
|
39 | + * @param $subst1 string First substitution for $msg |
|
40 | + * @param $subst2 string ... |
|
41 | + */ |
|
42 | + public function send($severity, $msg) { |
|
43 | + |
|
44 | + $args = array(); |
|
45 | + if (func_num_args() > 2) { |
|
46 | + $args = func_get_args(); |
|
47 | + array_shift($args); |
|
48 | + unset($args[0]); |
|
49 | + } |
|
50 | + |
|
51 | + $token = $this->context->get('CurrentToken', true); |
|
52 | + $line = $token ? $token->line : $this->context->get('CurrentLine', true); |
|
53 | + $col = $token ? $token->col : $this->context->get('CurrentCol', true); |
|
54 | + $attr = $this->context->get('CurrentAttr', true); |
|
55 | + |
|
56 | + // perform special substitutions, also add custom parameters |
|
57 | + $subst = array(); |
|
58 | + if (!is_null($token)) { |
|
59 | + $args['CurrentToken'] = $token; |
|
60 | + } |
|
61 | + if (!is_null($attr)) { |
|
62 | + $subst['$CurrentAttr.Name'] = $attr; |
|
63 | + if (isset($token->attr[$attr])) $subst['$CurrentAttr.Value'] = $token->attr[$attr]; |
|
64 | + } |
|
65 | + |
|
66 | + if (empty($args)) { |
|
67 | + $msg = $this->locale->getMessage($msg); |
|
68 | + } else { |
|
69 | + $msg = $this->locale->formatMessage($msg, $args); |
|
70 | + } |
|
71 | + |
|
72 | + if (!empty($subst)) $msg = strtr($msg, $subst); |
|
73 | + |
|
74 | + // (numerically indexed) |
|
75 | + $error = array( |
|
76 | + self::LINENO => $line, |
|
77 | + self::SEVERITY => $severity, |
|
78 | + self::MESSAGE => $msg, |
|
79 | + self::CHILDREN => array() |
|
80 | + ); |
|
81 | + $this->_current[] = $error; |
|
82 | + |
|
83 | + |
|
84 | + // NEW CODE BELOW ... |
|
85 | + |
|
86 | + $struct = null; |
|
87 | + // Top-level errors are either: |
|
88 | + // TOKEN type, if $value is set appropriately, or |
|
89 | + // "syntax" type, if $value is null |
|
90 | + $new_struct = new HTMLPurifier_ErrorStruct(); |
|
91 | + $new_struct->type = HTMLPurifier_ErrorStruct::TOKEN; |
|
92 | + if ($token) $new_struct->value = clone $token; |
|
93 | + if (is_int($line) && is_int($col)) { |
|
94 | + if (isset($this->lines[$line][$col])) { |
|
95 | + $struct = $this->lines[$line][$col]; |
|
96 | + } else { |
|
97 | + $struct = $this->lines[$line][$col] = $new_struct; |
|
98 | + } |
|
99 | + // These ksorts may present a performance problem |
|
100 | + ksort($this->lines[$line], SORT_NUMERIC); |
|
101 | + } else { |
|
102 | + if (isset($this->lines[-1])) { |
|
103 | + $struct = $this->lines[-1]; |
|
104 | + } else { |
|
105 | + $struct = $this->lines[-1] = $new_struct; |
|
106 | + } |
|
107 | + } |
|
108 | + ksort($this->lines, SORT_NUMERIC); |
|
109 | + |
|
110 | + // Now, check if we need to operate on a lower structure |
|
111 | + if (!empty($attr)) { |
|
112 | + $struct = $struct->getChild(HTMLPurifier_ErrorStruct::ATTR, $attr); |
|
113 | + if (!$struct->value) { |
|
114 | + $struct->value = array($attr, 'PUT VALUE HERE'); |
|
115 | + } |
|
116 | + } |
|
117 | + if (!empty($cssprop)) { |
|
118 | + $struct = $struct->getChild(HTMLPurifier_ErrorStruct::CSSPROP, $cssprop); |
|
119 | + if (!$struct->value) { |
|
120 | + // if we tokenize CSS this might be a little more difficult to do |
|
121 | + $struct->value = array($cssprop, 'PUT VALUE HERE'); |
|
122 | + } |
|
123 | + } |
|
124 | + |
|
125 | + // Ok, structs are all setup, now time to register the error |
|
126 | + $struct->addError($severity, $msg); |
|
127 | + } |
|
128 | + |
|
129 | + /** |
|
130 | + * Retrieves raw error data for custom formatter to use |
|
131 | + * @param List of arrays in format of array(line of error, |
|
132 | + * error severity, error message, |
|
133 | + * recursive sub-errors array) |
|
134 | + */ |
|
135 | + public function getRaw() { |
|
136 | + return $this->errors; |
|
137 | + } |
|
138 | + |
|
139 | + /** |
|
140 | + * Default HTML formatting implementation for error messages |
|
141 | + * @param $config Configuration array, vital for HTML output nature |
|
142 | + * @param $errors Errors array to display; used for recursion. |
|
143 | + */ |
|
144 | + public function getHTMLFormatted($config, $errors = null) { |
|
145 | + $ret = array(); |
|
146 | + |
|
147 | + $this->generator = new HTMLPurifier_Generator($config, $this->context); |
|
148 | + if ($errors === null) $errors = $this->errors; |
|
149 | + |
|
150 | + // 'At line' message needs to be removed |
|
151 | + |
|
152 | + // generation code for new structure goes here. It needs to be recursive. |
|
153 | + foreach ($this->lines as $line => $col_array) { |
|
154 | + if ($line == -1) continue; |
|
155 | + foreach ($col_array as $col => $struct) { |
|
156 | + $this->_renderStruct($ret, $struct, $line, $col); |
|
157 | + } |
|
158 | + } |
|
159 | + if (isset($this->lines[-1])) { |
|
160 | + $this->_renderStruct($ret, $this->lines[-1]); |
|
161 | + } |
|
162 | + |
|
163 | + if (empty($errors)) { |
|
164 | + return '<p>' . $this->locale->getMessage('ErrorCollector: No errors') . '</p>'; |
|
165 | + } else { |
|
166 | + return '<ul><li>' . implode('</li><li>', $ret) . '</li></ul>'; |
|
167 | + } |
|
168 | + |
|
169 | + } |
|
170 | + |
|
171 | + private function _renderStruct(&$ret, $struct, $line = null, $col = null) { |
|
172 | + $stack = array($struct); |
|
173 | + $context_stack = array(array()); |
|
174 | + while ($current = array_pop($stack)) { |
|
175 | + $context = array_pop($context_stack); |
|
176 | + foreach ($current->errors as $error) { |
|
177 | + list($severity, $msg) = $error; |
|
178 | + $string = ''; |
|
179 | + $string .= '<div>'; |
|
180 | + // W3C uses an icon to indicate the severity of the error. |
|
181 | + $error = $this->locale->getErrorName($severity); |
|
182 | + $string .= "<span class=\"error e$severity\"><strong>$error</strong></span> "; |
|
183 | + if (!is_null($line) && !is_null($col)) { |
|
184 | + $string .= "<em class=\"location\">Line $line, Column $col: </em> "; |
|
185 | + } else { |
|
186 | + $string .= '<em class="location">End of Document: </em> '; |
|
187 | + } |
|
188 | + $string .= '<strong class="description">' . $this->generator->escape($msg) . '</strong> '; |
|
189 | + $string .= '</div>'; |
|
190 | + // Here, have a marker for the character on the column appropriate. |
|
191 | + // Be sure to clip extremely long lines. |
|
192 | + //$string .= '<pre>'; |
|
193 | + //$string .= ''; |
|
194 | + //$string .= '</pre>'; |
|
195 | + $ret[] = $string; |
|
196 | + } |
|
197 | + foreach ($current->children as $type => $array) { |
|
198 | + $context[] = $current; |
|
199 | + $stack = array_merge($stack, array_reverse($array, true)); |
|
200 | + for ($i = count($array); $i > 0; $i--) { |
|
201 | + $context_stack[] = $context; |
|
202 | + } |
|
203 | + } |
|
204 | + } |
|
205 | + } |
|
206 | 206 | |
207 | 207 | } |
208 | 208 |
@@ -60,7 +60,9 @@ discard block |
||
60 | 60 | } |
61 | 61 | if (!is_null($attr)) { |
62 | 62 | $subst['$CurrentAttr.Name'] = $attr; |
63 | - if (isset($token->attr[$attr])) $subst['$CurrentAttr.Value'] = $token->attr[$attr]; |
|
63 | + if (isset($token->attr[$attr])) { |
|
64 | + $subst['$CurrentAttr.Value'] = $token->attr[$attr]; |
|
65 | + } |
|
64 | 66 | } |
65 | 67 | |
66 | 68 | if (empty($args)) { |
@@ -69,7 +71,9 @@ discard block |
||
69 | 71 | $msg = $this->locale->formatMessage($msg, $args); |
70 | 72 | } |
71 | 73 | |
72 | - if (!empty($subst)) $msg = strtr($msg, $subst); |
|
74 | + if (!empty($subst)) { |
|
75 | + $msg = strtr($msg, $subst); |
|
76 | + } |
|
73 | 77 | |
74 | 78 | // (numerically indexed) |
75 | 79 | $error = array( |
@@ -89,7 +93,9 @@ discard block |
||
89 | 93 | // "syntax" type, if $value is null |
90 | 94 | $new_struct = new HTMLPurifier_ErrorStruct(); |
91 | 95 | $new_struct->type = HTMLPurifier_ErrorStruct::TOKEN; |
92 | - if ($token) $new_struct->value = clone $token; |
|
96 | + if ($token) { |
|
97 | + $new_struct->value = clone $token; |
|
98 | + } |
|
93 | 99 | if (is_int($line) && is_int($col)) { |
94 | 100 | if (isset($this->lines[$line][$col])) { |
95 | 101 | $struct = $this->lines[$line][$col]; |
@@ -145,13 +151,17 @@ discard block |
||
145 | 151 | $ret = array(); |
146 | 152 | |
147 | 153 | $this->generator = new HTMLPurifier_Generator($config, $this->context); |
148 | - if ($errors === null) $errors = $this->errors; |
|
154 | + if ($errors === null) { |
|
155 | + $errors = $this->errors; |
|
156 | + } |
|
149 | 157 | |
150 | 158 | // 'At line' message needs to be removed |
151 | 159 | |
152 | 160 | // generation code for new structure goes here. It needs to be recursive. |
153 | 161 | foreach ($this->lines as $line => $col_array) { |
154 | - if ($line == -1) continue; |
|
162 | + if ($line == -1) { |
|
163 | + continue; |
|
164 | + } |
|
155 | 165 | foreach ($col_array as $col => $struct) { |
156 | 166 | $this->_renderStruct($ret, $struct, $line, $col); |
157 | 167 | } |
@@ -26,10 +26,10 @@ discard block |
||
26 | 26 | protected $lines = array(); |
27 | 27 | |
28 | 28 | public function __construct($context) { |
29 | - $this->locale =& $context->get('Locale'); |
|
29 | + $this->locale = & $context->get('Locale'); |
|
30 | 30 | $this->context = $context; |
31 | - $this->_current =& $this->_stacks[0]; |
|
32 | - $this->errors =& $this->_stacks[0]; |
|
31 | + $this->_current = & $this->_stacks[0]; |
|
32 | + $this->errors = & $this->_stacks[0]; |
|
33 | 33 | } |
34 | 34 | |
35 | 35 | /** |
@@ -50,7 +50,7 @@ discard block |
||
50 | 50 | |
51 | 51 | $token = $this->context->get('CurrentToken', true); |
52 | 52 | $line = $token ? $token->line : $this->context->get('CurrentLine', true); |
53 | - $col = $token ? $token->col : $this->context->get('CurrentCol', true); |
|
53 | + $col = $token ? $token->col : $this->context->get('CurrentCol', true); |
|
54 | 54 | $attr = $this->context->get('CurrentAttr', true); |
55 | 55 | |
56 | 56 | // perform special substitutions, also add custom parameters |
@@ -161,9 +161,9 @@ discard block |
||
161 | 161 | } |
162 | 162 | |
163 | 163 | if (empty($errors)) { |
164 | - return '<p>' . $this->locale->getMessage('ErrorCollector: No errors') . '</p>'; |
|
164 | + return '<p>'.$this->locale->getMessage('ErrorCollector: No errors').'</p>'; |
|
165 | 165 | } else { |
166 | - return '<ul><li>' . implode('</li><li>', $ret) . '</li></ul>'; |
|
166 | + return '<ul><li>'.implode('</li><li>', $ret).'</li></ul>'; |
|
167 | 167 | } |
168 | 168 | |
169 | 169 | } |
@@ -185,7 +185,7 @@ discard block |
||
185 | 185 | } else { |
186 | 186 | $string .= '<em class="location">End of Document: </em> '; |
187 | 187 | } |
188 | - $string .= '<strong class="description">' . $this->generator->escape($msg) . '</strong> '; |
|
188 | + $string .= '<strong class="description">'.$this->generator->escape($msg).'</strong> '; |
|
189 | 189 | $string .= '</div>'; |
190 | 190 | // Here, have a marker for the character on the column appropriate. |
191 | 191 | // Be sure to clip extremely long lines. |
@@ -70,7 +70,7 @@ discard block |
||
70 | 70 | * Generates HTML from an array of tokens. |
71 | 71 | * @param $tokens Array of HTMLPurifier_Token |
72 | 72 | * @param $config HTMLPurifier_Config object |
73 | - * @return Generated HTML |
|
73 | + * @return string HTML |
|
74 | 74 | */ |
75 | 75 | public function generateFromTokens($tokens) { |
76 | 76 | if (!$tokens) return ''; |
@@ -115,7 +115,7 @@ discard block |
||
115 | 115 | /** |
116 | 116 | * Generates HTML from a single token. |
117 | 117 | * @param $token HTMLPurifier_Token object. |
118 | - * @return Generated HTML |
|
118 | + * @return string HTML |
|
119 | 119 | */ |
120 | 120 | public function generateFromToken($token) { |
121 | 121 | if (!$token instanceof HTMLPurifier_Token) { |
@@ -181,7 +181,7 @@ discard block |
||
181 | 181 | * @param $assoc_array_of_attributes Attribute array |
182 | 182 | * @param $element Name of element attributes are for, used to check |
183 | 183 | * attribute minimization. |
184 | - * @return Generate HTML fragment for insertion. |
|
184 | + * @return string HTML fragment for insertion. |
|
185 | 185 | */ |
186 | 186 | public function generateAttributes($assoc_array_of_attributes, $element = false) { |
187 | 187 | $html = ''; |
@@ -238,7 +238,7 @@ discard block |
||
238 | 238 | * for properly generating HTML here w/o using tokens, it stays |
239 | 239 | * public. |
240 | 240 | * @param $string String data to escape for HTML. |
241 | - * @param $quote Quoting style, like htmlspecialchars. ENT_NOQUOTES is |
|
241 | + * @param integer $quote Quoting style, like htmlspecialchars. ENT_NOQUOTES is |
|
242 | 242 | * permissible for non-attribute output. |
243 | 243 | * @return String escaped data. |
244 | 244 | */ |
@@ -10,244 +10,244 @@ |
||
10 | 10 | class HTMLPurifier_Generator |
11 | 11 | { |
12 | 12 | |
13 | - /** |
|
14 | - * Whether or not generator should produce XML output |
|
15 | - */ |
|
16 | - private $_xhtml = true; |
|
17 | - |
|
18 | - /** |
|
19 | - * :HACK: Whether or not generator should comment the insides of <script> tags |
|
20 | - */ |
|
21 | - private $_scriptFix = false; |
|
22 | - |
|
23 | - /** |
|
24 | - * Cache of HTMLDefinition during HTML output to determine whether or |
|
25 | - * not attributes should be minimized. |
|
26 | - */ |
|
27 | - private $_def; |
|
28 | - |
|
29 | - /** |
|
30 | - * Cache of %Output.SortAttr |
|
31 | - */ |
|
32 | - private $_sortAttr; |
|
33 | - |
|
34 | - /** |
|
35 | - * Cache of %Output.FlashCompat |
|
36 | - */ |
|
37 | - private $_flashCompat; |
|
38 | - |
|
39 | - /** |
|
40 | - * Cache of %Output.FixInnerHTML |
|
41 | - */ |
|
42 | - private $_innerHTMLFix; |
|
43 | - |
|
44 | - /** |
|
45 | - * Stack for keeping track of object information when outputting IE |
|
46 | - * compatibility code. |
|
47 | - */ |
|
48 | - private $_flashStack = array(); |
|
49 | - |
|
50 | - /** |
|
51 | - * Configuration for the generator |
|
52 | - */ |
|
53 | - protected $config; |
|
54 | - |
|
55 | - /** |
|
56 | - * @param $config Instance of HTMLPurifier_Config |
|
57 | - * @param $context Instance of HTMLPurifier_Context |
|
58 | - */ |
|
59 | - public function __construct($config, $context) { |
|
60 | - $this->config = $config; |
|
61 | - $this->_scriptFix = $config->get('Output.CommentScriptContents'); |
|
62 | - $this->_innerHTMLFix = $config->get('Output.FixInnerHTML'); |
|
63 | - $this->_sortAttr = $config->get('Output.SortAttr'); |
|
64 | - $this->_flashCompat = $config->get('Output.FlashCompat'); |
|
65 | - $this->_def = $config->getHTMLDefinition(); |
|
66 | - $this->_xhtml = $this->_def->doctype->xml; |
|
67 | - } |
|
68 | - |
|
69 | - /** |
|
70 | - * Generates HTML from an array of tokens. |
|
71 | - * @param $tokens Array of HTMLPurifier_Token |
|
72 | - * @param $config HTMLPurifier_Config object |
|
73 | - * @return Generated HTML |
|
74 | - */ |
|
75 | - public function generateFromTokens($tokens) { |
|
76 | - if (!$tokens) return ''; |
|
77 | - |
|
78 | - // Basic algorithm |
|
79 | - $html = ''; |
|
80 | - for ($i = 0, $size = count($tokens); $i < $size; $i++) { |
|
81 | - if ($this->_scriptFix && $tokens[$i]->name === 'script' |
|
82 | - && $i + 2 < $size && $tokens[$i+2] instanceof HTMLPurifier_Token_End) { |
|
83 | - // script special case |
|
84 | - // the contents of the script block must be ONE token |
|
85 | - // for this to work. |
|
86 | - $html .= $this->generateFromToken($tokens[$i++]); |
|
87 | - $html .= $this->generateScriptFromToken($tokens[$i++]); |
|
88 | - } |
|
89 | - $html .= $this->generateFromToken($tokens[$i]); |
|
90 | - } |
|
91 | - |
|
92 | - // Tidy cleanup |
|
93 | - if (extension_loaded('tidy') && $this->config->get('Output.TidyFormat')) { |
|
94 | - $tidy = new Tidy; |
|
95 | - $tidy->parseString($html, array( |
|
96 | - 'indent'=> true, |
|
97 | - 'output-xhtml' => $this->_xhtml, |
|
98 | - 'show-body-only' => true, |
|
99 | - 'indent-spaces' => 2, |
|
100 | - 'wrap' => 68, |
|
101 | - ), 'utf8'); |
|
102 | - $tidy->cleanRepair(); |
|
103 | - $html = (string) $tidy; // explicit cast necessary |
|
104 | - } |
|
105 | - |
|
106 | - // Normalize newlines to system defined value |
|
107 | - if ($this->config->get('Core.NormalizeNewlines')) { |
|
108 | - $nl = $this->config->get('Output.Newline'); |
|
109 | - if ($nl === null) $nl = PHP_EOL; |
|
110 | - if ($nl !== "\n") $html = str_replace("\n", $nl, $html); |
|
111 | - } |
|
112 | - return $html; |
|
113 | - } |
|
114 | - |
|
115 | - /** |
|
116 | - * Generates HTML from a single token. |
|
117 | - * @param $token HTMLPurifier_Token object. |
|
118 | - * @return Generated HTML |
|
119 | - */ |
|
120 | - public function generateFromToken($token) { |
|
121 | - if (!$token instanceof HTMLPurifier_Token) { |
|
122 | - trigger_error('Cannot generate HTML from non-HTMLPurifier_Token object', E_USER_WARNING); |
|
123 | - return ''; |
|
124 | - |
|
125 | - } elseif ($token instanceof HTMLPurifier_Token_Start) { |
|
126 | - $attr = $this->generateAttributes($token->attr, $token->name); |
|
127 | - if ($this->_flashCompat) { |
|
128 | - if ($token->name == "object") { |
|
129 | - $flash = new stdclass(); |
|
130 | - $flash->attr = $token->attr; |
|
131 | - $flash->param = array(); |
|
132 | - $this->_flashStack[] = $flash; |
|
133 | - } |
|
134 | - } |
|
135 | - return '<' . $token->name . ($attr ? ' ' : '') . $attr . '>'; |
|
136 | - |
|
137 | - } elseif ($token instanceof HTMLPurifier_Token_End) { |
|
138 | - $_extra = ''; |
|
139 | - if ($this->_flashCompat) { |
|
140 | - if ($token->name == "object" && !empty($this->_flashStack)) { |
|
141 | - // doesn't do anything for now |
|
142 | - } |
|
143 | - } |
|
144 | - return $_extra . '</' . $token->name . '>'; |
|
145 | - |
|
146 | - } elseif ($token instanceof HTMLPurifier_Token_Empty) { |
|
147 | - if ($this->_flashCompat && $token->name == "param" && !empty($this->_flashStack)) { |
|
148 | - $this->_flashStack[count($this->_flashStack)-1]->param[$token->attr['name']] = $token->attr['value']; |
|
149 | - } |
|
150 | - $attr = $this->generateAttributes($token->attr, $token->name); |
|
151 | - return '<' . $token->name . ($attr ? ' ' : '') . $attr . |
|
152 | - ( $this->_xhtml ? ' /': '' ) // <br /> v. <br> |
|
153 | - . '>'; |
|
154 | - |
|
155 | - } elseif ($token instanceof HTMLPurifier_Token_Text) { |
|
156 | - return $this->escape($token->data, ENT_NOQUOTES); |
|
157 | - |
|
158 | - } elseif ($token instanceof HTMLPurifier_Token_Comment) { |
|
159 | - return '<!--' . $token->data . '-->'; |
|
160 | - } else { |
|
161 | - return ''; |
|
162 | - |
|
163 | - } |
|
164 | - } |
|
165 | - |
|
166 | - /** |
|
167 | - * Special case processor for the contents of script tags |
|
168 | - * @warning This runs into problems if there's already a literal |
|
169 | - * --> somewhere inside the script contents. |
|
170 | - */ |
|
171 | - public function generateScriptFromToken($token) { |
|
172 | - if (!$token instanceof HTMLPurifier_Token_Text) return $this->generateFromToken($token); |
|
173 | - // Thanks <http://lachy.id.au/log/2005/05/script-comments> |
|
174 | - $data = preg_replace('#//\s*$#', '', $token->data); |
|
175 | - return '<!--//--><![CDATA[//><!--' . "\n" . trim($data) . "\n" . '//--><!]]>'; |
|
176 | - } |
|
177 | - |
|
178 | - /** |
|
179 | - * Generates attribute declarations from attribute array. |
|
180 | - * @note This does not include the leading or trailing space. |
|
181 | - * @param $assoc_array_of_attributes Attribute array |
|
182 | - * @param $element Name of element attributes are for, used to check |
|
183 | - * attribute minimization. |
|
184 | - * @return Generate HTML fragment for insertion. |
|
185 | - */ |
|
186 | - public function generateAttributes($assoc_array_of_attributes, $element = false) { |
|
187 | - $html = ''; |
|
188 | - if ($this->_sortAttr) ksort($assoc_array_of_attributes); |
|
189 | - foreach ($assoc_array_of_attributes as $key => $value) { |
|
190 | - if (!$this->_xhtml) { |
|
191 | - // Remove namespaced attributes |
|
192 | - if (strpos($key, ':') !== false) continue; |
|
193 | - // Check if we should minimize the attribute: val="val" -> val |
|
194 | - if ($element && !empty($this->_def->info[$element]->attr[$key]->minimized)) { |
|
195 | - $html .= $key . ' '; |
|
196 | - continue; |
|
197 | - } |
|
198 | - } |
|
199 | - // Workaround for Internet Explorer innerHTML bug. |
|
200 | - // Essentially, Internet Explorer, when calculating |
|
201 | - // innerHTML, omits quotes if there are no instances of |
|
202 | - // angled brackets, quotes or spaces. However, when parsing |
|
203 | - // HTML (for example, when you assign to innerHTML), it |
|
204 | - // treats backticks as quotes. Thus, |
|
205 | - // <img alt="``" /> |
|
206 | - // becomes |
|
207 | - // <img alt=`` /> |
|
208 | - // becomes |
|
209 | - // <img alt='' /> |
|
210 | - // Fortunately, all we need to do is trigger an appropriate |
|
211 | - // quoting style, which we do by adding an extra space. |
|
212 | - // This also is consistent with the W3C spec, which states |
|
213 | - // that user agents may ignore leading or trailing |
|
214 | - // whitespace (in fact, most don't, at least for attributes |
|
215 | - // like alt, but an extra space at the end is barely |
|
216 | - // noticeable). Still, we have a configuration knob for |
|
217 | - // this, since this transformation is not necesary if you |
|
218 | - // don't process user input with innerHTML or you don't plan |
|
219 | - // on supporting Internet Explorer. |
|
220 | - if ($this->_innerHTMLFix) { |
|
221 | - if (strpos($value, '`') !== false) { |
|
222 | - // check if correct quoting style would not already be |
|
223 | - // triggered |
|
224 | - if (strcspn($value, '"\' <>') === strlen($value)) { |
|
225 | - // protect! |
|
226 | - $value .= ' '; |
|
227 | - } |
|
228 | - } |
|
229 | - } |
|
230 | - $html .= $key.'="'.$this->escape($value).'" '; |
|
231 | - } |
|
232 | - return rtrim($html); |
|
233 | - } |
|
234 | - |
|
235 | - /** |
|
236 | - * Escapes raw text data. |
|
237 | - * @todo This really ought to be protected, but until we have a facility |
|
238 | - * for properly generating HTML here w/o using tokens, it stays |
|
239 | - * public. |
|
240 | - * @param $string String data to escape for HTML. |
|
241 | - * @param $quote Quoting style, like htmlspecialchars. ENT_NOQUOTES is |
|
242 | - * permissible for non-attribute output. |
|
243 | - * @return String escaped data. |
|
244 | - */ |
|
245 | - public function escape($string, $quote = null) { |
|
246 | - // Workaround for APC bug on Mac Leopard reported by sidepodcast |
|
247 | - // http://htmlpurifier.org/phorum/read.php?3,4823,4846 |
|
248 | - if ($quote === null) $quote = ENT_COMPAT; |
|
249 | - return htmlspecialchars($string, $quote, 'UTF-8', false); |
|
250 | - } |
|
13 | + /** |
|
14 | + * Whether or not generator should produce XML output |
|
15 | + */ |
|
16 | + private $_xhtml = true; |
|
17 | + |
|
18 | + /** |
|
19 | + * :HACK: Whether or not generator should comment the insides of <script> tags |
|
20 | + */ |
|
21 | + private $_scriptFix = false; |
|
22 | + |
|
23 | + /** |
|
24 | + * Cache of HTMLDefinition during HTML output to determine whether or |
|
25 | + * not attributes should be minimized. |
|
26 | + */ |
|
27 | + private $_def; |
|
28 | + |
|
29 | + /** |
|
30 | + * Cache of %Output.SortAttr |
|
31 | + */ |
|
32 | + private $_sortAttr; |
|
33 | + |
|
34 | + /** |
|
35 | + * Cache of %Output.FlashCompat |
|
36 | + */ |
|
37 | + private $_flashCompat; |
|
38 | + |
|
39 | + /** |
|
40 | + * Cache of %Output.FixInnerHTML |
|
41 | + */ |
|
42 | + private $_innerHTMLFix; |
|
43 | + |
|
44 | + /** |
|
45 | + * Stack for keeping track of object information when outputting IE |
|
46 | + * compatibility code. |
|
47 | + */ |
|
48 | + private $_flashStack = array(); |
|
49 | + |
|
50 | + /** |
|
51 | + * Configuration for the generator |
|
52 | + */ |
|
53 | + protected $config; |
|
54 | + |
|
55 | + /** |
|
56 | + * @param $config Instance of HTMLPurifier_Config |
|
57 | + * @param $context Instance of HTMLPurifier_Context |
|
58 | + */ |
|
59 | + public function __construct($config, $context) { |
|
60 | + $this->config = $config; |
|
61 | + $this->_scriptFix = $config->get('Output.CommentScriptContents'); |
|
62 | + $this->_innerHTMLFix = $config->get('Output.FixInnerHTML'); |
|
63 | + $this->_sortAttr = $config->get('Output.SortAttr'); |
|
64 | + $this->_flashCompat = $config->get('Output.FlashCompat'); |
|
65 | + $this->_def = $config->getHTMLDefinition(); |
|
66 | + $this->_xhtml = $this->_def->doctype->xml; |
|
67 | + } |
|
68 | + |
|
69 | + /** |
|
70 | + * Generates HTML from an array of tokens. |
|
71 | + * @param $tokens Array of HTMLPurifier_Token |
|
72 | + * @param $config HTMLPurifier_Config object |
|
73 | + * @return Generated HTML |
|
74 | + */ |
|
75 | + public function generateFromTokens($tokens) { |
|
76 | + if (!$tokens) return ''; |
|
77 | + |
|
78 | + // Basic algorithm |
|
79 | + $html = ''; |
|
80 | + for ($i = 0, $size = count($tokens); $i < $size; $i++) { |
|
81 | + if ($this->_scriptFix && $tokens[$i]->name === 'script' |
|
82 | + && $i + 2 < $size && $tokens[$i+2] instanceof HTMLPurifier_Token_End) { |
|
83 | + // script special case |
|
84 | + // the contents of the script block must be ONE token |
|
85 | + // for this to work. |
|
86 | + $html .= $this->generateFromToken($tokens[$i++]); |
|
87 | + $html .= $this->generateScriptFromToken($tokens[$i++]); |
|
88 | + } |
|
89 | + $html .= $this->generateFromToken($tokens[$i]); |
|
90 | + } |
|
91 | + |
|
92 | + // Tidy cleanup |
|
93 | + if (extension_loaded('tidy') && $this->config->get('Output.TidyFormat')) { |
|
94 | + $tidy = new Tidy; |
|
95 | + $tidy->parseString($html, array( |
|
96 | + 'indent'=> true, |
|
97 | + 'output-xhtml' => $this->_xhtml, |
|
98 | + 'show-body-only' => true, |
|
99 | + 'indent-spaces' => 2, |
|
100 | + 'wrap' => 68, |
|
101 | + ), 'utf8'); |
|
102 | + $tidy->cleanRepair(); |
|
103 | + $html = (string) $tidy; // explicit cast necessary |
|
104 | + } |
|
105 | + |
|
106 | + // Normalize newlines to system defined value |
|
107 | + if ($this->config->get('Core.NormalizeNewlines')) { |
|
108 | + $nl = $this->config->get('Output.Newline'); |
|
109 | + if ($nl === null) $nl = PHP_EOL; |
|
110 | + if ($nl !== "\n") $html = str_replace("\n", $nl, $html); |
|
111 | + } |
|
112 | + return $html; |
|
113 | + } |
|
114 | + |
|
115 | + /** |
|
116 | + * Generates HTML from a single token. |
|
117 | + * @param $token HTMLPurifier_Token object. |
|
118 | + * @return Generated HTML |
|
119 | + */ |
|
120 | + public function generateFromToken($token) { |
|
121 | + if (!$token instanceof HTMLPurifier_Token) { |
|
122 | + trigger_error('Cannot generate HTML from non-HTMLPurifier_Token object', E_USER_WARNING); |
|
123 | + return ''; |
|
124 | + |
|
125 | + } elseif ($token instanceof HTMLPurifier_Token_Start) { |
|
126 | + $attr = $this->generateAttributes($token->attr, $token->name); |
|
127 | + if ($this->_flashCompat) { |
|
128 | + if ($token->name == "object") { |
|
129 | + $flash = new stdclass(); |
|
130 | + $flash->attr = $token->attr; |
|
131 | + $flash->param = array(); |
|
132 | + $this->_flashStack[] = $flash; |
|
133 | + } |
|
134 | + } |
|
135 | + return '<' . $token->name . ($attr ? ' ' : '') . $attr . '>'; |
|
136 | + |
|
137 | + } elseif ($token instanceof HTMLPurifier_Token_End) { |
|
138 | + $_extra = ''; |
|
139 | + if ($this->_flashCompat) { |
|
140 | + if ($token->name == "object" && !empty($this->_flashStack)) { |
|
141 | + // doesn't do anything for now |
|
142 | + } |
|
143 | + } |
|
144 | + return $_extra . '</' . $token->name . '>'; |
|
145 | + |
|
146 | + } elseif ($token instanceof HTMLPurifier_Token_Empty) { |
|
147 | + if ($this->_flashCompat && $token->name == "param" && !empty($this->_flashStack)) { |
|
148 | + $this->_flashStack[count($this->_flashStack)-1]->param[$token->attr['name']] = $token->attr['value']; |
|
149 | + } |
|
150 | + $attr = $this->generateAttributes($token->attr, $token->name); |
|
151 | + return '<' . $token->name . ($attr ? ' ' : '') . $attr . |
|
152 | + ( $this->_xhtml ? ' /': '' ) // <br /> v. <br> |
|
153 | + . '>'; |
|
154 | + |
|
155 | + } elseif ($token instanceof HTMLPurifier_Token_Text) { |
|
156 | + return $this->escape($token->data, ENT_NOQUOTES); |
|
157 | + |
|
158 | + } elseif ($token instanceof HTMLPurifier_Token_Comment) { |
|
159 | + return '<!--' . $token->data . '-->'; |
|
160 | + } else { |
|
161 | + return ''; |
|
162 | + |
|
163 | + } |
|
164 | + } |
|
165 | + |
|
166 | + /** |
|
167 | + * Special case processor for the contents of script tags |
|
168 | + * @warning This runs into problems if there's already a literal |
|
169 | + * --> somewhere inside the script contents. |
|
170 | + */ |
|
171 | + public function generateScriptFromToken($token) { |
|
172 | + if (!$token instanceof HTMLPurifier_Token_Text) return $this->generateFromToken($token); |
|
173 | + // Thanks <http://lachy.id.au/log/2005/05/script-comments> |
|
174 | + $data = preg_replace('#//\s*$#', '', $token->data); |
|
175 | + return '<!--//--><![CDATA[//><!--' . "\n" . trim($data) . "\n" . '//--><!]]>'; |
|
176 | + } |
|
177 | + |
|
178 | + /** |
|
179 | + * Generates attribute declarations from attribute array. |
|
180 | + * @note This does not include the leading or trailing space. |
|
181 | + * @param $assoc_array_of_attributes Attribute array |
|
182 | + * @param $element Name of element attributes are for, used to check |
|
183 | + * attribute minimization. |
|
184 | + * @return Generate HTML fragment for insertion. |
|
185 | + */ |
|
186 | + public function generateAttributes($assoc_array_of_attributes, $element = false) { |
|
187 | + $html = ''; |
|
188 | + if ($this->_sortAttr) ksort($assoc_array_of_attributes); |
|
189 | + foreach ($assoc_array_of_attributes as $key => $value) { |
|
190 | + if (!$this->_xhtml) { |
|
191 | + // Remove namespaced attributes |
|
192 | + if (strpos($key, ':') !== false) continue; |
|
193 | + // Check if we should minimize the attribute: val="val" -> val |
|
194 | + if ($element && !empty($this->_def->info[$element]->attr[$key]->minimized)) { |
|
195 | + $html .= $key . ' '; |
|
196 | + continue; |
|
197 | + } |
|
198 | + } |
|
199 | + // Workaround for Internet Explorer innerHTML bug. |
|
200 | + // Essentially, Internet Explorer, when calculating |
|
201 | + // innerHTML, omits quotes if there are no instances of |
|
202 | + // angled brackets, quotes or spaces. However, when parsing |
|
203 | + // HTML (for example, when you assign to innerHTML), it |
|
204 | + // treats backticks as quotes. Thus, |
|
205 | + // <img alt="``" /> |
|
206 | + // becomes |
|
207 | + // <img alt=`` /> |
|
208 | + // becomes |
|
209 | + // <img alt='' /> |
|
210 | + // Fortunately, all we need to do is trigger an appropriate |
|
211 | + // quoting style, which we do by adding an extra space. |
|
212 | + // This also is consistent with the W3C spec, which states |
|
213 | + // that user agents may ignore leading or trailing |
|
214 | + // whitespace (in fact, most don't, at least for attributes |
|
215 | + // like alt, but an extra space at the end is barely |
|
216 | + // noticeable). Still, we have a configuration knob for |
|
217 | + // this, since this transformation is not necesary if you |
|
218 | + // don't process user input with innerHTML or you don't plan |
|
219 | + // on supporting Internet Explorer. |
|
220 | + if ($this->_innerHTMLFix) { |
|
221 | + if (strpos($value, '`') !== false) { |
|
222 | + // check if correct quoting style would not already be |
|
223 | + // triggered |
|
224 | + if (strcspn($value, '"\' <>') === strlen($value)) { |
|
225 | + // protect! |
|
226 | + $value .= ' '; |
|
227 | + } |
|
228 | + } |
|
229 | + } |
|
230 | + $html .= $key.'="'.$this->escape($value).'" '; |
|
231 | + } |
|
232 | + return rtrim($html); |
|
233 | + } |
|
234 | + |
|
235 | + /** |
|
236 | + * Escapes raw text data. |
|
237 | + * @todo This really ought to be protected, but until we have a facility |
|
238 | + * for properly generating HTML here w/o using tokens, it stays |
|
239 | + * public. |
|
240 | + * @param $string String data to escape for HTML. |
|
241 | + * @param $quote Quoting style, like htmlspecialchars. ENT_NOQUOTES is |
|
242 | + * permissible for non-attribute output. |
|
243 | + * @return String escaped data. |
|
244 | + */ |
|
245 | + public function escape($string, $quote = null) { |
|
246 | + // Workaround for APC bug on Mac Leopard reported by sidepodcast |
|
247 | + // http://htmlpurifier.org/phorum/read.php?3,4823,4846 |
|
248 | + if ($quote === null) $quote = ENT_COMPAT; |
|
249 | + return htmlspecialchars($string, $quote, 'UTF-8', false); |
|
250 | + } |
|
251 | 251 | |
252 | 252 | } |
253 | 253 |
@@ -73,7 +73,9 @@ discard block |
||
73 | 73 | * @return Generated HTML |
74 | 74 | */ |
75 | 75 | public function generateFromTokens($tokens) { |
76 | - if (!$tokens) return ''; |
|
76 | + if (!$tokens) { |
|
77 | + return ''; |
|
78 | + } |
|
77 | 79 | |
78 | 80 | // Basic algorithm |
79 | 81 | $html = ''; |
@@ -106,8 +108,12 @@ discard block |
||
106 | 108 | // Normalize newlines to system defined value |
107 | 109 | if ($this->config->get('Core.NormalizeNewlines')) { |
108 | 110 | $nl = $this->config->get('Output.Newline'); |
109 | - if ($nl === null) $nl = PHP_EOL; |
|
110 | - if ($nl !== "\n") $html = str_replace("\n", $nl, $html); |
|
111 | + if ($nl === null) { |
|
112 | + $nl = PHP_EOL; |
|
113 | + } |
|
114 | + if ($nl !== "\n") { |
|
115 | + $html = str_replace("\n", $nl, $html); |
|
116 | + } |
|
111 | 117 | } |
112 | 118 | return $html; |
113 | 119 | } |
@@ -169,7 +175,9 @@ discard block |
||
169 | 175 | * --> somewhere inside the script contents. |
170 | 176 | */ |
171 | 177 | public function generateScriptFromToken($token) { |
172 | - if (!$token instanceof HTMLPurifier_Token_Text) return $this->generateFromToken($token); |
|
178 | + if (!$token instanceof HTMLPurifier_Token_Text) { |
|
179 | + return $this->generateFromToken($token); |
|
180 | + } |
|
173 | 181 | // Thanks <http://lachy.id.au/log/2005/05/script-comments> |
174 | 182 | $data = preg_replace('#//\s*$#', '', $token->data); |
175 | 183 | return '<!--//--><![CDATA[//><!--' . "\n" . trim($data) . "\n" . '//--><!]]>'; |
@@ -185,11 +193,15 @@ discard block |
||
185 | 193 | */ |
186 | 194 | public function generateAttributes($assoc_array_of_attributes, $element = false) { |
187 | 195 | $html = ''; |
188 | - if ($this->_sortAttr) ksort($assoc_array_of_attributes); |
|
196 | + if ($this->_sortAttr) { |
|
197 | + ksort($assoc_array_of_attributes); |
|
198 | + } |
|
189 | 199 | foreach ($assoc_array_of_attributes as $key => $value) { |
190 | 200 | if (!$this->_xhtml) { |
191 | 201 | // Remove namespaced attributes |
192 | - if (strpos($key, ':') !== false) continue; |
|
202 | + if (strpos($key, ':') !== false) { |
|
203 | + continue; |
|
204 | + } |
|
193 | 205 | // Check if we should minimize the attribute: val="val" -> val |
194 | 206 | if ($element && !empty($this->_def->info[$element]->attr[$key]->minimized)) { |
195 | 207 | $html .= $key . ' '; |
@@ -245,7 +257,9 @@ discard block |
||
245 | 257 | public function escape($string, $quote = null) { |
246 | 258 | // Workaround for APC bug on Mac Leopard reported by sidepodcast |
247 | 259 | // http://htmlpurifier.org/phorum/read.php?3,4823,4846 |
248 | - if ($quote === null) $quote = ENT_COMPAT; |
|
260 | + if ($quote === null) { |
|
261 | + $quote = ENT_COMPAT; |
|
262 | + } |
|
249 | 263 | return htmlspecialchars($string, $quote, 'UTF-8', false); |
250 | 264 | } |
251 | 265 |
@@ -79,7 +79,7 @@ discard block |
||
79 | 79 | $html = ''; |
80 | 80 | for ($i = 0, $size = count($tokens); $i < $size; $i++) { |
81 | 81 | if ($this->_scriptFix && $tokens[$i]->name === 'script' |
82 | - && $i + 2 < $size && $tokens[$i+2] instanceof HTMLPurifier_Token_End) { |
|
82 | + && $i + 2 < $size && $tokens[$i + 2] instanceof HTMLPurifier_Token_End) { |
|
83 | 83 | // script special case |
84 | 84 | // the contents of the script block must be ONE token |
85 | 85 | // for this to work. |
@@ -132,7 +132,7 @@ discard block |
||
132 | 132 | $this->_flashStack[] = $flash; |
133 | 133 | } |
134 | 134 | } |
135 | - return '<' . $token->name . ($attr ? ' ' : '') . $attr . '>'; |
|
135 | + return '<'.$token->name.($attr ? ' ' : '').$attr.'>'; |
|
136 | 136 | |
137 | 137 | } elseif ($token instanceof HTMLPurifier_Token_End) { |
138 | 138 | $_extra = ''; |
@@ -141,22 +141,22 @@ discard block |
||
141 | 141 | // doesn't do anything for now |
142 | 142 | } |
143 | 143 | } |
144 | - return $_extra . '</' . $token->name . '>'; |
|
144 | + return $_extra.'</'.$token->name.'>'; |
|
145 | 145 | |
146 | 146 | } elseif ($token instanceof HTMLPurifier_Token_Empty) { |
147 | 147 | if ($this->_flashCompat && $token->name == "param" && !empty($this->_flashStack)) { |
148 | - $this->_flashStack[count($this->_flashStack)-1]->param[$token->attr['name']] = $token->attr['value']; |
|
148 | + $this->_flashStack[count($this->_flashStack) - 1]->param[$token->attr['name']] = $token->attr['value']; |
|
149 | 149 | } |
150 | 150 | $attr = $this->generateAttributes($token->attr, $token->name); |
151 | - return '<' . $token->name . ($attr ? ' ' : '') . $attr . |
|
152 | - ( $this->_xhtml ? ' /': '' ) // <br /> v. <br> |
|
151 | + return '<'.$token->name.($attr ? ' ' : '').$attr. |
|
152 | + ($this->_xhtml ? ' /' : '') // <br /> v. <br> |
|
153 | 153 | . '>'; |
154 | 154 | |
155 | 155 | } elseif ($token instanceof HTMLPurifier_Token_Text) { |
156 | 156 | return $this->escape($token->data, ENT_NOQUOTES); |
157 | 157 | |
158 | 158 | } elseif ($token instanceof HTMLPurifier_Token_Comment) { |
159 | - return '<!--' . $token->data . '-->'; |
|
159 | + return '<!--'.$token->data.'-->'; |
|
160 | 160 | } else { |
161 | 161 | return ''; |
162 | 162 | |
@@ -172,7 +172,7 @@ discard block |
||
172 | 172 | if (!$token instanceof HTMLPurifier_Token_Text) return $this->generateFromToken($token); |
173 | 173 | // Thanks <http://lachy.id.au/log/2005/05/script-comments> |
174 | 174 | $data = preg_replace('#//\s*$#', '', $token->data); |
175 | - return '<!--//--><![CDATA[//><!--' . "\n" . trim($data) . "\n" . '//--><!]]>'; |
|
175 | + return '<!--//--><![CDATA[//><!--'."\n".trim($data)."\n".'//--><!]]>'; |
|
176 | 176 | } |
177 | 177 | |
178 | 178 | /** |
@@ -192,7 +192,7 @@ discard block |
||
192 | 192 | if (strpos($key, ':') !== false) continue; |
193 | 193 | // Check if we should minimize the attribute: val="val" -> val |
194 | 194 | if ($element && !empty($this->_def->info[$element]->attr[$key]->minimized)) { |
195 | - $html .= $key . ' '; |
|
195 | + $html .= $key.' '; |
|
196 | 196 | continue; |
197 | 197 | } |
198 | 198 | } |
@@ -385,8 +385,7 @@ |
||
385 | 385 | * separate lists for processing. Format is element[attr1|attr2],element2... |
386 | 386 | * @warning Although it's largely drawn from TinyMCE's implementation, |
387 | 387 | * it is different, and you'll probably have to modify your lists |
388 | - * @param $list String list to parse |
|
389 | - * @param array($allowed_elements, $allowed_attributes) |
|
388 | + * @param string $list String list to parse |
|
390 | 389 | * @todo Give this its own class, probably static interface |
391 | 390 | */ |
392 | 391 | public function parseTinyMCEAllowedList($list) { |
@@ -26,398 +26,398 @@ |
||
26 | 26 | class HTMLPurifier_HTMLDefinition extends HTMLPurifier_Definition |
27 | 27 | { |
28 | 28 | |
29 | - // FULLY-PUBLIC VARIABLES --------------------------------------------- |
|
30 | - |
|
31 | - /** |
|
32 | - * Associative array of element names to HTMLPurifier_ElementDef |
|
33 | - */ |
|
34 | - public $info = array(); |
|
35 | - |
|
36 | - /** |
|
37 | - * Associative array of global attribute name to attribute definition. |
|
38 | - */ |
|
39 | - public $info_global_attr = array(); |
|
40 | - |
|
41 | - /** |
|
42 | - * String name of parent element HTML will be going into. |
|
43 | - */ |
|
44 | - public $info_parent = 'div'; |
|
45 | - |
|
46 | - /** |
|
47 | - * Definition for parent element, allows parent element to be a |
|
48 | - * tag that's not allowed inside the HTML fragment. |
|
49 | - */ |
|
50 | - public $info_parent_def; |
|
51 | - |
|
52 | - /** |
|
53 | - * String name of element used to wrap inline elements in block context |
|
54 | - * @note This is rarely used except for BLOCKQUOTEs in strict mode |
|
55 | - */ |
|
56 | - public $info_block_wrapper = 'p'; |
|
57 | - |
|
58 | - /** |
|
59 | - * Associative array of deprecated tag name to HTMLPurifier_TagTransform |
|
60 | - */ |
|
61 | - public $info_tag_transform = array(); |
|
62 | - |
|
63 | - /** |
|
64 | - * Indexed list of HTMLPurifier_AttrTransform to be performed before validation. |
|
65 | - */ |
|
66 | - public $info_attr_transform_pre = array(); |
|
67 | - |
|
68 | - /** |
|
69 | - * Indexed list of HTMLPurifier_AttrTransform to be performed after validation. |
|
70 | - */ |
|
71 | - public $info_attr_transform_post = array(); |
|
72 | - |
|
73 | - /** |
|
74 | - * Nested lookup array of content set name (Block, Inline) to |
|
75 | - * element name to whether or not it belongs in that content set. |
|
76 | - */ |
|
77 | - public $info_content_sets = array(); |
|
78 | - |
|
79 | - /** |
|
80 | - * Indexed list of HTMLPurifier_Injector to be used. |
|
81 | - */ |
|
82 | - public $info_injector = array(); |
|
83 | - |
|
84 | - /** |
|
85 | - * Doctype object |
|
86 | - */ |
|
87 | - public $doctype; |
|
88 | - |
|
89 | - |
|
90 | - |
|
91 | - // RAW CUSTOMIZATION STUFF -------------------------------------------- |
|
92 | - |
|
93 | - /** |
|
94 | - * Adds a custom attribute to a pre-existing element |
|
95 | - * @note This is strictly convenience, and does not have a corresponding |
|
96 | - * method in HTMLPurifier_HTMLModule |
|
97 | - * @param $element_name String element name to add attribute to |
|
98 | - * @param $attr_name String name of attribute |
|
99 | - * @param $def Attribute definition, can be string or object, see |
|
100 | - * HTMLPurifier_AttrTypes for details |
|
101 | - */ |
|
102 | - public function addAttribute($element_name, $attr_name, $def) { |
|
103 | - $module = $this->getAnonymousModule(); |
|
104 | - if (!isset($module->info[$element_name])) { |
|
105 | - $element = $module->addBlankElement($element_name); |
|
106 | - } else { |
|
107 | - $element = $module->info[$element_name]; |
|
108 | - } |
|
109 | - $element->attr[$attr_name] = $def; |
|
110 | - } |
|
111 | - |
|
112 | - /** |
|
113 | - * Adds a custom element to your HTML definition |
|
114 | - * @note See HTMLPurifier_HTMLModule::addElement for detailed |
|
115 | - * parameter and return value descriptions. |
|
116 | - */ |
|
117 | - public function addElement($element_name, $type, $contents, $attr_collections, $attributes = array()) { |
|
118 | - $module = $this->getAnonymousModule(); |
|
119 | - // assume that if the user is calling this, the element |
|
120 | - // is safe. This may not be a good idea |
|
121 | - $element = $module->addElement($element_name, $type, $contents, $attr_collections, $attributes); |
|
122 | - return $element; |
|
123 | - } |
|
124 | - |
|
125 | - /** |
|
126 | - * Adds a blank element to your HTML definition, for overriding |
|
127 | - * existing behavior |
|
128 | - * @note See HTMLPurifier_HTMLModule::addBlankElement for detailed |
|
129 | - * parameter and return value descriptions. |
|
130 | - */ |
|
131 | - public function addBlankElement($element_name) { |
|
132 | - $module = $this->getAnonymousModule(); |
|
133 | - $element = $module->addBlankElement($element_name); |
|
134 | - return $element; |
|
135 | - } |
|
136 | - |
|
137 | - /** |
|
138 | - * Retrieves a reference to the anonymous module, so you can |
|
139 | - * bust out advanced features without having to make your own |
|
140 | - * module. |
|
141 | - */ |
|
142 | - public function getAnonymousModule() { |
|
143 | - if (!$this->_anonModule) { |
|
144 | - $this->_anonModule = new HTMLPurifier_HTMLModule(); |
|
145 | - $this->_anonModule->name = 'Anonymous'; |
|
146 | - } |
|
147 | - return $this->_anonModule; |
|
148 | - } |
|
149 | - |
|
150 | - private $_anonModule = null; |
|
151 | - |
|
152 | - |
|
153 | - // PUBLIC BUT INTERNAL VARIABLES -------------------------------------- |
|
154 | - |
|
155 | - public $type = 'HTML'; |
|
156 | - public $manager; /**< Instance of HTMLPurifier_HTMLModuleManager */ |
|
157 | - |
|
158 | - /** |
|
159 | - * Performs low-cost, preliminary initialization. |
|
160 | - */ |
|
161 | - public function __construct() { |
|
162 | - $this->manager = new HTMLPurifier_HTMLModuleManager(); |
|
163 | - } |
|
164 | - |
|
165 | - protected function doSetup($config) { |
|
166 | - $this->processModules($config); |
|
167 | - $this->setupConfigStuff($config); |
|
168 | - unset($this->manager); |
|
169 | - |
|
170 | - // cleanup some of the element definitions |
|
171 | - foreach ($this->info as $k => $v) { |
|
172 | - unset($this->info[$k]->content_model); |
|
173 | - unset($this->info[$k]->content_model_type); |
|
174 | - } |
|
175 | - } |
|
176 | - |
|
177 | - /** |
|
178 | - * Extract out the information from the manager |
|
179 | - */ |
|
180 | - protected function processModules($config) { |
|
181 | - |
|
182 | - if ($this->_anonModule) { |
|
183 | - // for user specific changes |
|
184 | - // this is late-loaded so we don't have to deal with PHP4 |
|
185 | - // reference wonky-ness |
|
186 | - $this->manager->addModule($this->_anonModule); |
|
187 | - unset($this->_anonModule); |
|
188 | - } |
|
189 | - |
|
190 | - $this->manager->setup($config); |
|
191 | - $this->doctype = $this->manager->doctype; |
|
192 | - |
|
193 | - foreach ($this->manager->modules as $module) { |
|
194 | - foreach($module->info_tag_transform as $k => $v) { |
|
195 | - if ($v === false) unset($this->info_tag_transform[$k]); |
|
196 | - else $this->info_tag_transform[$k] = $v; |
|
197 | - } |
|
198 | - foreach($module->info_attr_transform_pre as $k => $v) { |
|
199 | - if ($v === false) unset($this->info_attr_transform_pre[$k]); |
|
200 | - else $this->info_attr_transform_pre[$k] = $v; |
|
201 | - } |
|
202 | - foreach($module->info_attr_transform_post as $k => $v) { |
|
203 | - if ($v === false) unset($this->info_attr_transform_post[$k]); |
|
204 | - else $this->info_attr_transform_post[$k] = $v; |
|
205 | - } |
|
206 | - foreach ($module->info_injector as $k => $v) { |
|
207 | - if ($v === false) unset($this->info_injector[$k]); |
|
208 | - else $this->info_injector[$k] = $v; |
|
209 | - } |
|
210 | - } |
|
211 | - |
|
212 | - $this->info = $this->manager->getElements(); |
|
213 | - $this->info_content_sets = $this->manager->contentSets->lookup; |
|
214 | - |
|
215 | - } |
|
216 | - |
|
217 | - /** |
|
218 | - * Sets up stuff based on config. We need a better way of doing this. |
|
219 | - */ |
|
220 | - protected function setupConfigStuff($config) { |
|
221 | - |
|
222 | - $block_wrapper = $config->get('HTML.BlockWrapper'); |
|
223 | - if (isset($this->info_content_sets['Block'][$block_wrapper])) { |
|
224 | - $this->info_block_wrapper = $block_wrapper; |
|
225 | - } else { |
|
226 | - trigger_error('Cannot use non-block element as block wrapper', |
|
227 | - E_USER_ERROR); |
|
228 | - } |
|
229 | - |
|
230 | - $parent = $config->get('HTML.Parent'); |
|
231 | - $def = $this->manager->getElement($parent, true); |
|
232 | - if ($def) { |
|
233 | - $this->info_parent = $parent; |
|
234 | - $this->info_parent_def = $def; |
|
235 | - } else { |
|
236 | - trigger_error('Cannot use unrecognized element as parent', |
|
237 | - E_USER_ERROR); |
|
238 | - $this->info_parent_def = $this->manager->getElement($this->info_parent, true); |
|
239 | - } |
|
240 | - |
|
241 | - // support template text |
|
242 | - $support = "(for information on implementing this, see the ". |
|
243 | - "support forums) "; |
|
244 | - |
|
245 | - // setup allowed elements ----------------------------------------- |
|
246 | - |
|
247 | - $allowed_elements = $config->get('HTML.AllowedElements'); |
|
248 | - $allowed_attributes = $config->get('HTML.AllowedAttributes'); // retrieve early |
|
249 | - |
|
250 | - if (!is_array($allowed_elements) && !is_array($allowed_attributes)) { |
|
251 | - $allowed = $config->get('HTML.Allowed'); |
|
252 | - if (is_string($allowed)) { |
|
253 | - list($allowed_elements, $allowed_attributes) = $this->parseTinyMCEAllowedList($allowed); |
|
254 | - } |
|
255 | - } |
|
256 | - |
|
257 | - if (is_array($allowed_elements)) { |
|
258 | - foreach ($this->info as $name => $d) { |
|
259 | - if(!isset($allowed_elements[$name])) unset($this->info[$name]); |
|
260 | - unset($allowed_elements[$name]); |
|
261 | - } |
|
262 | - // emit errors |
|
263 | - foreach ($allowed_elements as $element => $d) { |
|
264 | - $element = htmlspecialchars($element, ENT_COMPAT | ENT_HTML401, 'UTF-8', false); // PHP doesn't escape errors, be careful! |
|
265 | - trigger_error("Element '$element' is not supported $support", E_USER_WARNING); |
|
266 | - } |
|
267 | - } |
|
268 | - |
|
269 | - // setup allowed attributes --------------------------------------- |
|
270 | - |
|
271 | - $allowed_attributes_mutable = $allowed_attributes; // by copy! |
|
272 | - if (is_array($allowed_attributes)) { |
|
273 | - |
|
274 | - // This actually doesn't do anything, since we went away from |
|
275 | - // global attributes. It's possible that userland code uses |
|
276 | - // it, but HTMLModuleManager doesn't! |
|
277 | - foreach ($this->info_global_attr as $attr => $x) { |
|
278 | - $keys = array($attr, "*@$attr", "*.$attr"); |
|
279 | - $delete = true; |
|
280 | - foreach ($keys as $key) { |
|
281 | - if ($delete && isset($allowed_attributes[$key])) { |
|
282 | - $delete = false; |
|
283 | - } |
|
284 | - if (isset($allowed_attributes_mutable[$key])) { |
|
285 | - unset($allowed_attributes_mutable[$key]); |
|
286 | - } |
|
287 | - } |
|
288 | - if ($delete) unset($this->info_global_attr[$attr]); |
|
289 | - } |
|
290 | - |
|
291 | - foreach ($this->info as $tag => $info) { |
|
292 | - foreach ($info->attr as $attr => $x) { |
|
293 | - $keys = array("$tag@$attr", $attr, "*@$attr", "$tag.$attr", "*.$attr"); |
|
294 | - $delete = true; |
|
295 | - foreach ($keys as $key) { |
|
296 | - if ($delete && isset($allowed_attributes[$key])) { |
|
297 | - $delete = false; |
|
298 | - } |
|
299 | - if (isset($allowed_attributes_mutable[$key])) { |
|
300 | - unset($allowed_attributes_mutable[$key]); |
|
301 | - } |
|
302 | - } |
|
303 | - if ($delete) { |
|
304 | - if ($this->info[$tag]->attr[$attr]->required) { |
|
305 | - trigger_error("Required attribute '$attr' in element '$tag' was not allowed, which means '$tag' will not be allowed either", E_USER_WARNING); |
|
306 | - } |
|
307 | - unset($this->info[$tag]->attr[$attr]); |
|
308 | - } |
|
309 | - } |
|
310 | - } |
|
311 | - // emit errors |
|
312 | - foreach ($allowed_attributes_mutable as $elattr => $d) { |
|
313 | - $bits = preg_split('/[.@]/', $elattr, 2); |
|
314 | - $c = count($bits); |
|
315 | - switch ($c) { |
|
316 | - case 2: |
|
317 | - if ($bits[0] !== '*') { |
|
318 | - $element = htmlspecialchars($bits[0], ENT_COMPAT | ENT_HTML401, 'UTF-8', false); |
|
319 | - $attribute = htmlspecialchars($bits[1], ENT_COMPAT | ENT_HTML401, 'UTF-8', false); |
|
320 | - if (!isset($this->info[$element])) { |
|
321 | - trigger_error("Cannot allow attribute '$attribute' if element '$element' is not allowed/supported $support"); |
|
322 | - } else { |
|
323 | - trigger_error("Attribute '$attribute' in element '$element' not supported $support", |
|
324 | - E_USER_WARNING); |
|
325 | - } |
|
326 | - break; |
|
327 | - } |
|
328 | - // otherwise fall through |
|
329 | - case 1: |
|
330 | - $attribute = htmlspecialchars($bits[0], ENT_COMPAT | ENT_HTML401, 'UTF-8', false); |
|
331 | - trigger_error("Global attribute '$attribute' is not ". |
|
332 | - "supported in any elements $support", |
|
333 | - E_USER_WARNING); |
|
334 | - break; |
|
335 | - } |
|
336 | - } |
|
337 | - |
|
338 | - } |
|
339 | - |
|
340 | - // setup forbidden elements --------------------------------------- |
|
341 | - |
|
342 | - $forbidden_elements = $config->get('HTML.ForbiddenElements'); |
|
343 | - $forbidden_attributes = $config->get('HTML.ForbiddenAttributes'); |
|
344 | - |
|
345 | - foreach ($this->info as $tag => $info) { |
|
346 | - if (isset($forbidden_elements[$tag])) { |
|
347 | - unset($this->info[$tag]); |
|
348 | - continue; |
|
349 | - } |
|
350 | - foreach ($info->attr as $attr => $x) { |
|
351 | - if ( |
|
352 | - isset($forbidden_attributes["$tag@$attr"]) || |
|
353 | - isset($forbidden_attributes["*@$attr"]) || |
|
354 | - isset($forbidden_attributes[$attr]) |
|
355 | - ) { |
|
356 | - unset($this->info[$tag]->attr[$attr]); |
|
357 | - continue; |
|
358 | - } // this segment might get removed eventually |
|
359 | - elseif (isset($forbidden_attributes["$tag.$attr"])) { |
|
360 | - // $tag.$attr are not user supplied, so no worries! |
|
361 | - trigger_error("Error with $tag.$attr: tag.attr syntax not supported for HTML.ForbiddenAttributes; use tag@attr instead", E_USER_WARNING); |
|
362 | - } |
|
363 | - } |
|
364 | - } |
|
365 | - foreach ($forbidden_attributes as $key => $v) { |
|
366 | - if (strlen($key) < 2) continue; |
|
367 | - if ($key[0] != '*') continue; |
|
368 | - if ($key[1] == '.') { |
|
369 | - trigger_error("Error with $key: *.attr syntax not supported for HTML.ForbiddenAttributes; use attr instead", E_USER_WARNING); |
|
370 | - } |
|
371 | - } |
|
372 | - |
|
373 | - // setup injectors ----------------------------------------------------- |
|
374 | - foreach ($this->info_injector as $i => $injector) { |
|
375 | - if ($injector->checkNeeded($config) !== false) { |
|
376 | - // remove injector that does not have it's required |
|
377 | - // elements/attributes present, and is thus not needed. |
|
378 | - unset($this->info_injector[$i]); |
|
379 | - } |
|
380 | - } |
|
381 | - } |
|
382 | - |
|
383 | - /** |
|
384 | - * Parses a TinyMCE-flavored Allowed Elements and Attributes list into |
|
385 | - * separate lists for processing. Format is element[attr1|attr2],element2... |
|
386 | - * @warning Although it's largely drawn from TinyMCE's implementation, |
|
387 | - * it is different, and you'll probably have to modify your lists |
|
388 | - * @param $list String list to parse |
|
389 | - * @param array($allowed_elements, $allowed_attributes) |
|
390 | - * @todo Give this its own class, probably static interface |
|
391 | - */ |
|
392 | - public function parseTinyMCEAllowedList($list) { |
|
393 | - |
|
394 | - $list = str_replace(array(' ', "\t"), '', $list); |
|
395 | - |
|
396 | - $elements = array(); |
|
397 | - $attributes = array(); |
|
398 | - |
|
399 | - $chunks = preg_split('/(,|[\n\r]+)/', $list); |
|
400 | - foreach ($chunks as $chunk) { |
|
401 | - if (empty($chunk)) continue; |
|
402 | - // remove TinyMCE element control characters |
|
403 | - if (!strpos($chunk, '[')) { |
|
404 | - $element = $chunk; |
|
405 | - $attr = false; |
|
406 | - } else { |
|
407 | - list($element, $attr) = explode('[', $chunk); |
|
408 | - } |
|
409 | - if ($element !== '*') $elements[$element] = true; |
|
410 | - if (!$attr) continue; |
|
411 | - $attr = substr($attr, 0, strlen($attr) - 1); // remove trailing ] |
|
412 | - $attr = explode('|', $attr); |
|
413 | - foreach ($attr as $key) { |
|
414 | - $attributes["$element.$key"] = true; |
|
415 | - } |
|
416 | - } |
|
417 | - |
|
418 | - return array($elements, $attributes); |
|
419 | - |
|
420 | - } |
|
29 | + // FULLY-PUBLIC VARIABLES --------------------------------------------- |
|
30 | + |
|
31 | + /** |
|
32 | + * Associative array of element names to HTMLPurifier_ElementDef |
|
33 | + */ |
|
34 | + public $info = array(); |
|
35 | + |
|
36 | + /** |
|
37 | + * Associative array of global attribute name to attribute definition. |
|
38 | + */ |
|
39 | + public $info_global_attr = array(); |
|
40 | + |
|
41 | + /** |
|
42 | + * String name of parent element HTML will be going into. |
|
43 | + */ |
|
44 | + public $info_parent = 'div'; |
|
45 | + |
|
46 | + /** |
|
47 | + * Definition for parent element, allows parent element to be a |
|
48 | + * tag that's not allowed inside the HTML fragment. |
|
49 | + */ |
|
50 | + public $info_parent_def; |
|
51 | + |
|
52 | + /** |
|
53 | + * String name of element used to wrap inline elements in block context |
|
54 | + * @note This is rarely used except for BLOCKQUOTEs in strict mode |
|
55 | + */ |
|
56 | + public $info_block_wrapper = 'p'; |
|
57 | + |
|
58 | + /** |
|
59 | + * Associative array of deprecated tag name to HTMLPurifier_TagTransform |
|
60 | + */ |
|
61 | + public $info_tag_transform = array(); |
|
62 | + |
|
63 | + /** |
|
64 | + * Indexed list of HTMLPurifier_AttrTransform to be performed before validation. |
|
65 | + */ |
|
66 | + public $info_attr_transform_pre = array(); |
|
67 | + |
|
68 | + /** |
|
69 | + * Indexed list of HTMLPurifier_AttrTransform to be performed after validation. |
|
70 | + */ |
|
71 | + public $info_attr_transform_post = array(); |
|
72 | + |
|
73 | + /** |
|
74 | + * Nested lookup array of content set name (Block, Inline) to |
|
75 | + * element name to whether or not it belongs in that content set. |
|
76 | + */ |
|
77 | + public $info_content_sets = array(); |
|
78 | + |
|
79 | + /** |
|
80 | + * Indexed list of HTMLPurifier_Injector to be used. |
|
81 | + */ |
|
82 | + public $info_injector = array(); |
|
83 | + |
|
84 | + /** |
|
85 | + * Doctype object |
|
86 | + */ |
|
87 | + public $doctype; |
|
88 | + |
|
89 | + |
|
90 | + |
|
91 | + // RAW CUSTOMIZATION STUFF -------------------------------------------- |
|
92 | + |
|
93 | + /** |
|
94 | + * Adds a custom attribute to a pre-existing element |
|
95 | + * @note This is strictly convenience, and does not have a corresponding |
|
96 | + * method in HTMLPurifier_HTMLModule |
|
97 | + * @param $element_name String element name to add attribute to |
|
98 | + * @param $attr_name String name of attribute |
|
99 | + * @param $def Attribute definition, can be string or object, see |
|
100 | + * HTMLPurifier_AttrTypes for details |
|
101 | + */ |
|
102 | + public function addAttribute($element_name, $attr_name, $def) { |
|
103 | + $module = $this->getAnonymousModule(); |
|
104 | + if (!isset($module->info[$element_name])) { |
|
105 | + $element = $module->addBlankElement($element_name); |
|
106 | + } else { |
|
107 | + $element = $module->info[$element_name]; |
|
108 | + } |
|
109 | + $element->attr[$attr_name] = $def; |
|
110 | + } |
|
111 | + |
|
112 | + /** |
|
113 | + * Adds a custom element to your HTML definition |
|
114 | + * @note See HTMLPurifier_HTMLModule::addElement for detailed |
|
115 | + * parameter and return value descriptions. |
|
116 | + */ |
|
117 | + public function addElement($element_name, $type, $contents, $attr_collections, $attributes = array()) { |
|
118 | + $module = $this->getAnonymousModule(); |
|
119 | + // assume that if the user is calling this, the element |
|
120 | + // is safe. This may not be a good idea |
|
121 | + $element = $module->addElement($element_name, $type, $contents, $attr_collections, $attributes); |
|
122 | + return $element; |
|
123 | + } |
|
124 | + |
|
125 | + /** |
|
126 | + * Adds a blank element to your HTML definition, for overriding |
|
127 | + * existing behavior |
|
128 | + * @note See HTMLPurifier_HTMLModule::addBlankElement for detailed |
|
129 | + * parameter and return value descriptions. |
|
130 | + */ |
|
131 | + public function addBlankElement($element_name) { |
|
132 | + $module = $this->getAnonymousModule(); |
|
133 | + $element = $module->addBlankElement($element_name); |
|
134 | + return $element; |
|
135 | + } |
|
136 | + |
|
137 | + /** |
|
138 | + * Retrieves a reference to the anonymous module, so you can |
|
139 | + * bust out advanced features without having to make your own |
|
140 | + * module. |
|
141 | + */ |
|
142 | + public function getAnonymousModule() { |
|
143 | + if (!$this->_anonModule) { |
|
144 | + $this->_anonModule = new HTMLPurifier_HTMLModule(); |
|
145 | + $this->_anonModule->name = 'Anonymous'; |
|
146 | + } |
|
147 | + return $this->_anonModule; |
|
148 | + } |
|
149 | + |
|
150 | + private $_anonModule = null; |
|
151 | + |
|
152 | + |
|
153 | + // PUBLIC BUT INTERNAL VARIABLES -------------------------------------- |
|
154 | + |
|
155 | + public $type = 'HTML'; |
|
156 | + public $manager; /**< Instance of HTMLPurifier_HTMLModuleManager */ |
|
157 | + |
|
158 | + /** |
|
159 | + * Performs low-cost, preliminary initialization. |
|
160 | + */ |
|
161 | + public function __construct() { |
|
162 | + $this->manager = new HTMLPurifier_HTMLModuleManager(); |
|
163 | + } |
|
164 | + |
|
165 | + protected function doSetup($config) { |
|
166 | + $this->processModules($config); |
|
167 | + $this->setupConfigStuff($config); |
|
168 | + unset($this->manager); |
|
169 | + |
|
170 | + // cleanup some of the element definitions |
|
171 | + foreach ($this->info as $k => $v) { |
|
172 | + unset($this->info[$k]->content_model); |
|
173 | + unset($this->info[$k]->content_model_type); |
|
174 | + } |
|
175 | + } |
|
176 | + |
|
177 | + /** |
|
178 | + * Extract out the information from the manager |
|
179 | + */ |
|
180 | + protected function processModules($config) { |
|
181 | + |
|
182 | + if ($this->_anonModule) { |
|
183 | + // for user specific changes |
|
184 | + // this is late-loaded so we don't have to deal with PHP4 |
|
185 | + // reference wonky-ness |
|
186 | + $this->manager->addModule($this->_anonModule); |
|
187 | + unset($this->_anonModule); |
|
188 | + } |
|
189 | + |
|
190 | + $this->manager->setup($config); |
|
191 | + $this->doctype = $this->manager->doctype; |
|
192 | + |
|
193 | + foreach ($this->manager->modules as $module) { |
|
194 | + foreach($module->info_tag_transform as $k => $v) { |
|
195 | + if ($v === false) unset($this->info_tag_transform[$k]); |
|
196 | + else $this->info_tag_transform[$k] = $v; |
|
197 | + } |
|
198 | + foreach($module->info_attr_transform_pre as $k => $v) { |
|
199 | + if ($v === false) unset($this->info_attr_transform_pre[$k]); |
|
200 | + else $this->info_attr_transform_pre[$k] = $v; |
|
201 | + } |
|
202 | + foreach($module->info_attr_transform_post as $k => $v) { |
|
203 | + if ($v === false) unset($this->info_attr_transform_post[$k]); |
|
204 | + else $this->info_attr_transform_post[$k] = $v; |
|
205 | + } |
|
206 | + foreach ($module->info_injector as $k => $v) { |
|
207 | + if ($v === false) unset($this->info_injector[$k]); |
|
208 | + else $this->info_injector[$k] = $v; |
|
209 | + } |
|
210 | + } |
|
211 | + |
|
212 | + $this->info = $this->manager->getElements(); |
|
213 | + $this->info_content_sets = $this->manager->contentSets->lookup; |
|
214 | + |
|
215 | + } |
|
216 | + |
|
217 | + /** |
|
218 | + * Sets up stuff based on config. We need a better way of doing this. |
|
219 | + */ |
|
220 | + protected function setupConfigStuff($config) { |
|
221 | + |
|
222 | + $block_wrapper = $config->get('HTML.BlockWrapper'); |
|
223 | + if (isset($this->info_content_sets['Block'][$block_wrapper])) { |
|
224 | + $this->info_block_wrapper = $block_wrapper; |
|
225 | + } else { |
|
226 | + trigger_error('Cannot use non-block element as block wrapper', |
|
227 | + E_USER_ERROR); |
|
228 | + } |
|
229 | + |
|
230 | + $parent = $config->get('HTML.Parent'); |
|
231 | + $def = $this->manager->getElement($parent, true); |
|
232 | + if ($def) { |
|
233 | + $this->info_parent = $parent; |
|
234 | + $this->info_parent_def = $def; |
|
235 | + } else { |
|
236 | + trigger_error('Cannot use unrecognized element as parent', |
|
237 | + E_USER_ERROR); |
|
238 | + $this->info_parent_def = $this->manager->getElement($this->info_parent, true); |
|
239 | + } |
|
240 | + |
|
241 | + // support template text |
|
242 | + $support = "(for information on implementing this, see the ". |
|
243 | + "support forums) "; |
|
244 | + |
|
245 | + // setup allowed elements ----------------------------------------- |
|
246 | + |
|
247 | + $allowed_elements = $config->get('HTML.AllowedElements'); |
|
248 | + $allowed_attributes = $config->get('HTML.AllowedAttributes'); // retrieve early |
|
249 | + |
|
250 | + if (!is_array($allowed_elements) && !is_array($allowed_attributes)) { |
|
251 | + $allowed = $config->get('HTML.Allowed'); |
|
252 | + if (is_string($allowed)) { |
|
253 | + list($allowed_elements, $allowed_attributes) = $this->parseTinyMCEAllowedList($allowed); |
|
254 | + } |
|
255 | + } |
|
256 | + |
|
257 | + if (is_array($allowed_elements)) { |
|
258 | + foreach ($this->info as $name => $d) { |
|
259 | + if(!isset($allowed_elements[$name])) unset($this->info[$name]); |
|
260 | + unset($allowed_elements[$name]); |
|
261 | + } |
|
262 | + // emit errors |
|
263 | + foreach ($allowed_elements as $element => $d) { |
|
264 | + $element = htmlspecialchars($element, ENT_COMPAT | ENT_HTML401, 'UTF-8', false); // PHP doesn't escape errors, be careful! |
|
265 | + trigger_error("Element '$element' is not supported $support", E_USER_WARNING); |
|
266 | + } |
|
267 | + } |
|
268 | + |
|
269 | + // setup allowed attributes --------------------------------------- |
|
270 | + |
|
271 | + $allowed_attributes_mutable = $allowed_attributes; // by copy! |
|
272 | + if (is_array($allowed_attributes)) { |
|
273 | + |
|
274 | + // This actually doesn't do anything, since we went away from |
|
275 | + // global attributes. It's possible that userland code uses |
|
276 | + // it, but HTMLModuleManager doesn't! |
|
277 | + foreach ($this->info_global_attr as $attr => $x) { |
|
278 | + $keys = array($attr, "*@$attr", "*.$attr"); |
|
279 | + $delete = true; |
|
280 | + foreach ($keys as $key) { |
|
281 | + if ($delete && isset($allowed_attributes[$key])) { |
|
282 | + $delete = false; |
|
283 | + } |
|
284 | + if (isset($allowed_attributes_mutable[$key])) { |
|
285 | + unset($allowed_attributes_mutable[$key]); |
|
286 | + } |
|
287 | + } |
|
288 | + if ($delete) unset($this->info_global_attr[$attr]); |
|
289 | + } |
|
290 | + |
|
291 | + foreach ($this->info as $tag => $info) { |
|
292 | + foreach ($info->attr as $attr => $x) { |
|
293 | + $keys = array("$tag@$attr", $attr, "*@$attr", "$tag.$attr", "*.$attr"); |
|
294 | + $delete = true; |
|
295 | + foreach ($keys as $key) { |
|
296 | + if ($delete && isset($allowed_attributes[$key])) { |
|
297 | + $delete = false; |
|
298 | + } |
|
299 | + if (isset($allowed_attributes_mutable[$key])) { |
|
300 | + unset($allowed_attributes_mutable[$key]); |
|
301 | + } |
|
302 | + } |
|
303 | + if ($delete) { |
|
304 | + if ($this->info[$tag]->attr[$attr]->required) { |
|
305 | + trigger_error("Required attribute '$attr' in element '$tag' was not allowed, which means '$tag' will not be allowed either", E_USER_WARNING); |
|
306 | + } |
|
307 | + unset($this->info[$tag]->attr[$attr]); |
|
308 | + } |
|
309 | + } |
|
310 | + } |
|
311 | + // emit errors |
|
312 | + foreach ($allowed_attributes_mutable as $elattr => $d) { |
|
313 | + $bits = preg_split('/[.@]/', $elattr, 2); |
|
314 | + $c = count($bits); |
|
315 | + switch ($c) { |
|
316 | + case 2: |
|
317 | + if ($bits[0] !== '*') { |
|
318 | + $element = htmlspecialchars($bits[0], ENT_COMPAT | ENT_HTML401, 'UTF-8', false); |
|
319 | + $attribute = htmlspecialchars($bits[1], ENT_COMPAT | ENT_HTML401, 'UTF-8', false); |
|
320 | + if (!isset($this->info[$element])) { |
|
321 | + trigger_error("Cannot allow attribute '$attribute' if element '$element' is not allowed/supported $support"); |
|
322 | + } else { |
|
323 | + trigger_error("Attribute '$attribute' in element '$element' not supported $support", |
|
324 | + E_USER_WARNING); |
|
325 | + } |
|
326 | + break; |
|
327 | + } |
|
328 | + // otherwise fall through |
|
329 | + case 1: |
|
330 | + $attribute = htmlspecialchars($bits[0], ENT_COMPAT | ENT_HTML401, 'UTF-8', false); |
|
331 | + trigger_error("Global attribute '$attribute' is not ". |
|
332 | + "supported in any elements $support", |
|
333 | + E_USER_WARNING); |
|
334 | + break; |
|
335 | + } |
|
336 | + } |
|
337 | + |
|
338 | + } |
|
339 | + |
|
340 | + // setup forbidden elements --------------------------------------- |
|
341 | + |
|
342 | + $forbidden_elements = $config->get('HTML.ForbiddenElements'); |
|
343 | + $forbidden_attributes = $config->get('HTML.ForbiddenAttributes'); |
|
344 | + |
|
345 | + foreach ($this->info as $tag => $info) { |
|
346 | + if (isset($forbidden_elements[$tag])) { |
|
347 | + unset($this->info[$tag]); |
|
348 | + continue; |
|
349 | + } |
|
350 | + foreach ($info->attr as $attr => $x) { |
|
351 | + if ( |
|
352 | + isset($forbidden_attributes["$tag@$attr"]) || |
|
353 | + isset($forbidden_attributes["*@$attr"]) || |
|
354 | + isset($forbidden_attributes[$attr]) |
|
355 | + ) { |
|
356 | + unset($this->info[$tag]->attr[$attr]); |
|
357 | + continue; |
|
358 | + } // this segment might get removed eventually |
|
359 | + elseif (isset($forbidden_attributes["$tag.$attr"])) { |
|
360 | + // $tag.$attr are not user supplied, so no worries! |
|
361 | + trigger_error("Error with $tag.$attr: tag.attr syntax not supported for HTML.ForbiddenAttributes; use tag@attr instead", E_USER_WARNING); |
|
362 | + } |
|
363 | + } |
|
364 | + } |
|
365 | + foreach ($forbidden_attributes as $key => $v) { |
|
366 | + if (strlen($key) < 2) continue; |
|
367 | + if ($key[0] != '*') continue; |
|
368 | + if ($key[1] == '.') { |
|
369 | + trigger_error("Error with $key: *.attr syntax not supported for HTML.ForbiddenAttributes; use attr instead", E_USER_WARNING); |
|
370 | + } |
|
371 | + } |
|
372 | + |
|
373 | + // setup injectors ----------------------------------------------------- |
|
374 | + foreach ($this->info_injector as $i => $injector) { |
|
375 | + if ($injector->checkNeeded($config) !== false) { |
|
376 | + // remove injector that does not have it's required |
|
377 | + // elements/attributes present, and is thus not needed. |
|
378 | + unset($this->info_injector[$i]); |
|
379 | + } |
|
380 | + } |
|
381 | + } |
|
382 | + |
|
383 | + /** |
|
384 | + * Parses a TinyMCE-flavored Allowed Elements and Attributes list into |
|
385 | + * separate lists for processing. Format is element[attr1|attr2],element2... |
|
386 | + * @warning Although it's largely drawn from TinyMCE's implementation, |
|
387 | + * it is different, and you'll probably have to modify your lists |
|
388 | + * @param $list String list to parse |
|
389 | + * @param array($allowed_elements, $allowed_attributes) |
|
390 | + * @todo Give this its own class, probably static interface |
|
391 | + */ |
|
392 | + public function parseTinyMCEAllowedList($list) { |
|
393 | + |
|
394 | + $list = str_replace(array(' ', "\t"), '', $list); |
|
395 | + |
|
396 | + $elements = array(); |
|
397 | + $attributes = array(); |
|
398 | + |
|
399 | + $chunks = preg_split('/(,|[\n\r]+)/', $list); |
|
400 | + foreach ($chunks as $chunk) { |
|
401 | + if (empty($chunk)) continue; |
|
402 | + // remove TinyMCE element control characters |
|
403 | + if (!strpos($chunk, '[')) { |
|
404 | + $element = $chunk; |
|
405 | + $attr = false; |
|
406 | + } else { |
|
407 | + list($element, $attr) = explode('[', $chunk); |
|
408 | + } |
|
409 | + if ($element !== '*') $elements[$element] = true; |
|
410 | + if (!$attr) continue; |
|
411 | + $attr = substr($attr, 0, strlen($attr) - 1); // remove trailing ] |
|
412 | + $attr = explode('|', $attr); |
|
413 | + foreach ($attr as $key) { |
|
414 | + $attributes["$element.$key"] = true; |
|
415 | + } |
|
416 | + } |
|
417 | + |
|
418 | + return array($elements, $attributes); |
|
419 | + |
|
420 | + } |
|
421 | 421 | |
422 | 422 | |
423 | 423 | } |
@@ -192,20 +192,32 @@ discard block |
||
192 | 192 | |
193 | 193 | foreach ($this->manager->modules as $module) { |
194 | 194 | foreach($module->info_tag_transform as $k => $v) { |
195 | - if ($v === false) unset($this->info_tag_transform[$k]); |
|
196 | - else $this->info_tag_transform[$k] = $v; |
|
195 | + if ($v === false) { |
|
196 | + unset($this->info_tag_transform[$k]); |
|
197 | + } else { |
|
198 | + $this->info_tag_transform[$k] = $v; |
|
199 | + } |
|
197 | 200 | } |
198 | 201 | foreach($module->info_attr_transform_pre as $k => $v) { |
199 | - if ($v === false) unset($this->info_attr_transform_pre[$k]); |
|
200 | - else $this->info_attr_transform_pre[$k] = $v; |
|
202 | + if ($v === false) { |
|
203 | + unset($this->info_attr_transform_pre[$k]); |
|
204 | + } else { |
|
205 | + $this->info_attr_transform_pre[$k] = $v; |
|
206 | + } |
|
201 | 207 | } |
202 | 208 | foreach($module->info_attr_transform_post as $k => $v) { |
203 | - if ($v === false) unset($this->info_attr_transform_post[$k]); |
|
204 | - else $this->info_attr_transform_post[$k] = $v; |
|
209 | + if ($v === false) { |
|
210 | + unset($this->info_attr_transform_post[$k]); |
|
211 | + } else { |
|
212 | + $this->info_attr_transform_post[$k] = $v; |
|
213 | + } |
|
205 | 214 | } |
206 | 215 | foreach ($module->info_injector as $k => $v) { |
207 | - if ($v === false) unset($this->info_injector[$k]); |
|
208 | - else $this->info_injector[$k] = $v; |
|
216 | + if ($v === false) { |
|
217 | + unset($this->info_injector[$k]); |
|
218 | + } else { |
|
219 | + $this->info_injector[$k] = $v; |
|
220 | + } |
|
209 | 221 | } |
210 | 222 | } |
211 | 223 | |
@@ -256,7 +268,9 @@ discard block |
||
256 | 268 | |
257 | 269 | if (is_array($allowed_elements)) { |
258 | 270 | foreach ($this->info as $name => $d) { |
259 | - if(!isset($allowed_elements[$name])) unset($this->info[$name]); |
|
271 | + if(!isset($allowed_elements[$name])) { |
|
272 | + unset($this->info[$name]); |
|
273 | + } |
|
260 | 274 | unset($allowed_elements[$name]); |
261 | 275 | } |
262 | 276 | // emit errors |
@@ -285,7 +299,9 @@ discard block |
||
285 | 299 | unset($allowed_attributes_mutable[$key]); |
286 | 300 | } |
287 | 301 | } |
288 | - if ($delete) unset($this->info_global_attr[$attr]); |
|
302 | + if ($delete) { |
|
303 | + unset($this->info_global_attr[$attr]); |
|
304 | + } |
|
289 | 305 | } |
290 | 306 | |
291 | 307 | foreach ($this->info as $tag => $info) { |
@@ -363,8 +379,12 @@ discard block |
||
363 | 379 | } |
364 | 380 | } |
365 | 381 | foreach ($forbidden_attributes as $key => $v) { |
366 | - if (strlen($key) < 2) continue; |
|
367 | - if ($key[0] != '*') continue; |
|
382 | + if (strlen($key) < 2) { |
|
383 | + continue; |
|
384 | + } |
|
385 | + if ($key[0] != '*') { |
|
386 | + continue; |
|
387 | + } |
|
368 | 388 | if ($key[1] == '.') { |
369 | 389 | trigger_error("Error with $key: *.attr syntax not supported for HTML.ForbiddenAttributes; use attr instead", E_USER_WARNING); |
370 | 390 | } |
@@ -398,7 +418,9 @@ discard block |
||
398 | 418 | |
399 | 419 | $chunks = preg_split('/(,|[\n\r]+)/', $list); |
400 | 420 | foreach ($chunks as $chunk) { |
401 | - if (empty($chunk)) continue; |
|
421 | + if (empty($chunk)) { |
|
422 | + continue; |
|
423 | + } |
|
402 | 424 | // remove TinyMCE element control characters |
403 | 425 | if (!strpos($chunk, '[')) { |
404 | 426 | $element = $chunk; |
@@ -406,8 +428,12 @@ discard block |
||
406 | 428 | } else { |
407 | 429 | list($element, $attr) = explode('[', $chunk); |
408 | 430 | } |
409 | - if ($element !== '*') $elements[$element] = true; |
|
410 | - if (!$attr) continue; |
|
431 | + if ($element !== '*') { |
|
432 | + $elements[$element] = true; |
|
433 | + } |
|
434 | + if (!$attr) { |
|
435 | + continue; |
|
436 | + } |
|
411 | 437 | $attr = substr($attr, 0, strlen($attr) - 1); // remove trailing ] |
412 | 438 | $attr = explode('|', $attr); |
413 | 439 | foreach ($attr as $key) { |
@@ -191,15 +191,15 @@ discard block |
||
191 | 191 | $this->doctype = $this->manager->doctype; |
192 | 192 | |
193 | 193 | foreach ($this->manager->modules as $module) { |
194 | - foreach($module->info_tag_transform as $k => $v) { |
|
194 | + foreach ($module->info_tag_transform as $k => $v) { |
|
195 | 195 | if ($v === false) unset($this->info_tag_transform[$k]); |
196 | 196 | else $this->info_tag_transform[$k] = $v; |
197 | 197 | } |
198 | - foreach($module->info_attr_transform_pre as $k => $v) { |
|
198 | + foreach ($module->info_attr_transform_pre as $k => $v) { |
|
199 | 199 | if ($v === false) unset($this->info_attr_transform_pre[$k]); |
200 | 200 | else $this->info_attr_transform_pre[$k] = $v; |
201 | 201 | } |
202 | - foreach($module->info_attr_transform_post as $k => $v) { |
|
202 | + foreach ($module->info_attr_transform_post as $k => $v) { |
|
203 | 203 | if ($v === false) unset($this->info_attr_transform_post[$k]); |
204 | 204 | else $this->info_attr_transform_post[$k] = $v; |
205 | 205 | } |
@@ -256,7 +256,7 @@ discard block |
||
256 | 256 | |
257 | 257 | if (is_array($allowed_elements)) { |
258 | 258 | foreach ($this->info as $name => $d) { |
259 | - if(!isset($allowed_elements[$name])) unset($this->info[$name]); |
|
259 | + if (!isset($allowed_elements[$name])) unset($this->info[$name]); |
|
260 | 260 | unset($allowed_elements[$name]); |
261 | 261 | } |
262 | 262 | // emit errors |
@@ -179,6 +179,7 @@ discard block |
||
179 | 179 | /** |
180 | 180 | * Adds a module to the current doctype by first registering it, |
181 | 181 | * and then tacking it on to the active doctype |
182 | + * @param HTMLPurifier_HTMLModule $module |
|
182 | 183 | */ |
183 | 184 | public function addModule($module) { |
184 | 185 | $this->registerModule($module); |
@@ -325,7 +326,7 @@ discard block |
||
325 | 326 | /** |
326 | 327 | * Retrieves a single merged element definition |
327 | 328 | * @param $name Name of element |
328 | - * @param $trusted Boolean trusted overriding parameter: set to true |
|
329 | + * @param boolean $trusted Boolean trusted overriding parameter: set to true |
|
329 | 330 | * if you want the full version of an element |
330 | 331 | * @return Merged HTMLPurifier_ElementDef |
331 | 332 | * @note You may notice that modules are getting iterated over twice (once |
@@ -3,412 +3,412 @@ |
||
3 | 3 | class HTMLPurifier_HTMLModuleManager |
4 | 4 | { |
5 | 5 | |
6 | - /** |
|
7 | - * Instance of HTMLPurifier_DoctypeRegistry |
|
8 | - */ |
|
9 | - public $doctypes; |
|
10 | - |
|
11 | - /** |
|
12 | - * Instance of current doctype |
|
13 | - */ |
|
14 | - public $doctype; |
|
15 | - |
|
16 | - /** |
|
17 | - * Instance of HTMLPurifier_AttrTypes |
|
18 | - */ |
|
19 | - public $attrTypes; |
|
20 | - |
|
21 | - /** |
|
22 | - * Active instances of modules for the specified doctype are |
|
23 | - * indexed, by name, in this array. |
|
24 | - */ |
|
25 | - public $modules = array(); |
|
26 | - |
|
27 | - /** |
|
28 | - * Array of recognized HTMLPurifier_Module instances, indexed by |
|
29 | - * module's class name. This array is usually lazy loaded, but a |
|
30 | - * user can overload a module by pre-emptively registering it. |
|
31 | - */ |
|
32 | - public $registeredModules = array(); |
|
33 | - |
|
34 | - /** |
|
35 | - * List of extra modules that were added by the user using addModule(). |
|
36 | - * These get unconditionally merged into the current doctype, whatever |
|
37 | - * it may be. |
|
38 | - */ |
|
39 | - public $userModules = array(); |
|
40 | - |
|
41 | - /** |
|
42 | - * Associative array of element name to list of modules that have |
|
43 | - * definitions for the element; this array is dynamically filled. |
|
44 | - */ |
|
45 | - public $elementLookup = array(); |
|
46 | - |
|
47 | - /** List of prefixes we should use for registering small names */ |
|
48 | - public $prefixes = array('HTMLPurifier_HTMLModule_'); |
|
49 | - |
|
50 | - public $contentSets; /**< Instance of HTMLPurifier_ContentSets */ |
|
51 | - public $attrCollections; /**< Instance of HTMLPurifier_AttrCollections */ |
|
52 | - |
|
53 | - /** If set to true, unsafe elements and attributes will be allowed */ |
|
54 | - public $trusted = false; |
|
55 | - |
|
56 | - public function __construct() { |
|
57 | - |
|
58 | - // editable internal objects |
|
59 | - $this->attrTypes = new HTMLPurifier_AttrTypes(); |
|
60 | - $this->doctypes = new HTMLPurifier_DoctypeRegistry(); |
|
61 | - |
|
62 | - // setup basic modules |
|
63 | - $common = array( |
|
64 | - 'CommonAttributes', 'Text', 'Hypertext', 'List', |
|
65 | - 'Presentation', 'Edit', 'Bdo', 'Tables', 'Image', |
|
66 | - 'StyleAttribute', |
|
67 | - // Unsafe: |
|
68 | - 'Scripting', 'Object', 'Forms', |
|
69 | - // Sorta legacy, but present in strict: |
|
70 | - 'Name', |
|
71 | - ); |
|
72 | - $transitional = array('Legacy', 'Target', 'Iframe'); |
|
73 | - $xml = array('XMLCommonAttributes'); |
|
74 | - $non_xml = array('NonXMLCommonAttributes'); |
|
75 | - |
|
76 | - // setup basic doctypes |
|
77 | - $this->doctypes->register( |
|
78 | - 'HTML 4.01 Transitional', false, |
|
79 | - array_merge($common, $transitional, $non_xml), |
|
80 | - array('Tidy_Transitional', 'Tidy_Proprietary'), |
|
81 | - array(), |
|
82 | - '-//W3C//DTD HTML 4.01 Transitional//EN', |
|
83 | - 'http://www.w3.org/TR/html4/loose.dtd' |
|
84 | - ); |
|
85 | - |
|
86 | - $this->doctypes->register( |
|
87 | - 'HTML 4.01 Strict', false, |
|
88 | - array_merge($common, $non_xml), |
|
89 | - array('Tidy_Strict', 'Tidy_Proprietary', 'Tidy_Name'), |
|
90 | - array(), |
|
91 | - '-//W3C//DTD HTML 4.01//EN', |
|
92 | - 'http://www.w3.org/TR/html4/strict.dtd' |
|
93 | - ); |
|
94 | - |
|
95 | - $this->doctypes->register( |
|
96 | - 'XHTML 1.0 Transitional', true, |
|
97 | - array_merge($common, $transitional, $xml, $non_xml), |
|
98 | - array('Tidy_Transitional', 'Tidy_XHTML', 'Tidy_Proprietary', 'Tidy_Name'), |
|
99 | - array(), |
|
100 | - '-//W3C//DTD XHTML 1.0 Transitional//EN', |
|
101 | - 'http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd' |
|
102 | - ); |
|
103 | - |
|
104 | - $this->doctypes->register( |
|
105 | - 'XHTML 1.0 Strict', true, |
|
106 | - array_merge($common, $xml, $non_xml), |
|
107 | - array('Tidy_Strict', 'Tidy_XHTML', 'Tidy_Strict', 'Tidy_Proprietary', 'Tidy_Name'), |
|
108 | - array(), |
|
109 | - '-//W3C//DTD XHTML 1.0 Strict//EN', |
|
110 | - 'http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd' |
|
111 | - ); |
|
112 | - |
|
113 | - $this->doctypes->register( |
|
114 | - 'XHTML 1.1', true, |
|
115 | - // Iframe is a real XHTML 1.1 module, despite being |
|
116 | - // "transitional"! |
|
117 | - array_merge($common, $xml, array('Ruby', 'Iframe')), |
|
118 | - array('Tidy_Strict', 'Tidy_XHTML', 'Tidy_Proprietary', 'Tidy_Strict', 'Tidy_Name'), // Tidy_XHTML1_1 |
|
119 | - array(), |
|
120 | - '-//W3C//DTD XHTML 1.1//EN', |
|
121 | - 'http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd' |
|
122 | - ); |
|
123 | - |
|
124 | - } |
|
125 | - |
|
126 | - /** |
|
127 | - * Registers a module to the recognized module list, useful for |
|
128 | - * overloading pre-existing modules. |
|
129 | - * @param $module Mixed: string module name, with or without |
|
130 | - * HTMLPurifier_HTMLModule prefix, or instance of |
|
131 | - * subclass of HTMLPurifier_HTMLModule. |
|
132 | - * @param $overload Boolean whether or not to overload previous modules. |
|
133 | - * If this is not set, and you do overload a module, |
|
134 | - * HTML Purifier will complain with a warning. |
|
135 | - * @note This function will not call autoload, you must instantiate |
|
136 | - * (and thus invoke) autoload outside the method. |
|
137 | - * @note If a string is passed as a module name, different variants |
|
138 | - * will be tested in this order: |
|
139 | - * - Check for HTMLPurifier_HTMLModule_$name |
|
140 | - * - Check all prefixes with $name in order they were added |
|
141 | - * - Check for literal object name |
|
142 | - * - Throw fatal error |
|
143 | - * If your object name collides with an internal class, specify |
|
144 | - * your module manually. All modules must have been included |
|
145 | - * externally: registerModule will not perform inclusions for you! |
|
146 | - */ |
|
147 | - public function registerModule($module, $overload = false) { |
|
148 | - if (is_string($module)) { |
|
149 | - // attempt to load the module |
|
150 | - $original_module = $module; |
|
151 | - $ok = false; |
|
152 | - foreach ($this->prefixes as $prefix) { |
|
153 | - $module = $prefix . $original_module; |
|
154 | - if (class_exists($module)) { |
|
155 | - $ok = true; |
|
156 | - break; |
|
157 | - } |
|
158 | - } |
|
159 | - if (!$ok) { |
|
160 | - $module = $original_module; |
|
161 | - if (!class_exists($module)) { |
|
162 | - trigger_error($original_module . ' module does not exist', |
|
163 | - E_USER_ERROR); |
|
164 | - return; |
|
165 | - } |
|
166 | - } |
|
167 | - $module = new $module(); |
|
168 | - } |
|
169 | - if (empty($module->name)) { |
|
170 | - trigger_error('Module instance of ' . get_class($module) . ' must have name'); |
|
171 | - return; |
|
172 | - } |
|
173 | - if (!$overload && isset($this->registeredModules[$module->name])) { |
|
174 | - trigger_error('Overloading ' . $module->name . ' without explicit overload parameter', E_USER_WARNING); |
|
175 | - } |
|
176 | - $this->registeredModules[$module->name] = $module; |
|
177 | - } |
|
178 | - |
|
179 | - /** |
|
180 | - * Adds a module to the current doctype by first registering it, |
|
181 | - * and then tacking it on to the active doctype |
|
182 | - */ |
|
183 | - public function addModule($module) { |
|
184 | - $this->registerModule($module); |
|
185 | - if (is_object($module)) $module = $module->name; |
|
186 | - $this->userModules[] = $module; |
|
187 | - } |
|
188 | - |
|
189 | - /** |
|
190 | - * Adds a class prefix that registerModule() will use to resolve a |
|
191 | - * string name to a concrete class |
|
192 | - */ |
|
193 | - public function addPrefix($prefix) { |
|
194 | - $this->prefixes[] = $prefix; |
|
195 | - } |
|
196 | - |
|
197 | - /** |
|
198 | - * Performs processing on modules, after being called you may |
|
199 | - * use getElement() and getElements() |
|
200 | - * @param $config Instance of HTMLPurifier_Config |
|
201 | - */ |
|
202 | - public function setup($config) { |
|
203 | - |
|
204 | - $this->trusted = $config->get('HTML.Trusted'); |
|
205 | - |
|
206 | - // generate |
|
207 | - $this->doctype = $this->doctypes->make($config); |
|
208 | - $modules = $this->doctype->modules; |
|
209 | - |
|
210 | - // take out the default modules that aren't allowed |
|
211 | - $lookup = $config->get('HTML.AllowedModules'); |
|
212 | - $special_cases = $config->get('HTML.CoreModules'); |
|
213 | - |
|
214 | - if (is_array($lookup)) { |
|
215 | - foreach ($modules as $k => $m) { |
|
216 | - if (isset($special_cases[$m])) continue; |
|
217 | - if (!isset($lookup[$m])) unset($modules[$k]); |
|
218 | - } |
|
219 | - } |
|
220 | - |
|
221 | - // custom modules |
|
222 | - if ($config->get('HTML.Proprietary')) { |
|
223 | - $modules[] = 'Proprietary'; |
|
224 | - } |
|
225 | - if ($config->get('HTML.SafeObject')) { |
|
226 | - $modules[] = 'SafeObject'; |
|
227 | - } |
|
228 | - if ($config->get('HTML.SafeEmbed')) { |
|
229 | - $modules[] = 'SafeEmbed'; |
|
230 | - } |
|
231 | - if ($config->get('HTML.Nofollow')) { |
|
232 | - $modules[] = 'Nofollow'; |
|
233 | - } |
|
234 | - if ($config->get('HTML.TargetBlank')) { |
|
235 | - $modules[] = 'TargetBlank'; |
|
236 | - } |
|
237 | - |
|
238 | - // merge in custom modules |
|
239 | - $modules = array_merge($modules, $this->userModules); |
|
240 | - |
|
241 | - foreach ($modules as $module) { |
|
242 | - $this->processModule($module); |
|
243 | - $this->modules[$module]->setup($config); |
|
244 | - } |
|
245 | - |
|
246 | - foreach ($this->doctype->tidyModules as $module) { |
|
247 | - $this->processModule($module); |
|
248 | - $this->modules[$module]->setup($config); |
|
249 | - } |
|
250 | - |
|
251 | - // prepare any injectors |
|
252 | - foreach ($this->modules as $module) { |
|
253 | - $n = array(); |
|
254 | - foreach ($module->info_injector as $i => $injector) { |
|
255 | - if (!is_object($injector)) { |
|
256 | - $class = "HTMLPurifier_Injector_$injector"; |
|
257 | - $injector = new $class; |
|
258 | - } |
|
259 | - $n[$injector->name] = $injector; |
|
260 | - } |
|
261 | - $module->info_injector = $n; |
|
262 | - } |
|
263 | - |
|
264 | - // setup lookup table based on all valid modules |
|
265 | - foreach ($this->modules as $module) { |
|
266 | - foreach ($module->info as $name => $def) { |
|
267 | - if (!isset($this->elementLookup[$name])) { |
|
268 | - $this->elementLookup[$name] = array(); |
|
269 | - } |
|
270 | - $this->elementLookup[$name][] = $module->name; |
|
271 | - } |
|
272 | - } |
|
273 | - |
|
274 | - // note the different choice |
|
275 | - $this->contentSets = new HTMLPurifier_ContentSets( |
|
276 | - // content set assembly deals with all possible modules, |
|
277 | - // not just ones deemed to be "safe" |
|
278 | - $this->modules |
|
279 | - ); |
|
280 | - $this->attrCollections = new HTMLPurifier_AttrCollections( |
|
281 | - $this->attrTypes, |
|
282 | - // there is no way to directly disable a global attribute, |
|
283 | - // but using AllowedAttributes or simply not including |
|
284 | - // the module in your custom doctype should be sufficient |
|
285 | - $this->modules |
|
286 | - ); |
|
287 | - } |
|
288 | - |
|
289 | - /** |
|
290 | - * Takes a module and adds it to the active module collection, |
|
291 | - * registering it if necessary. |
|
292 | - */ |
|
293 | - public function processModule($module) { |
|
294 | - if (!isset($this->registeredModules[$module]) || is_object($module)) { |
|
295 | - $this->registerModule($module); |
|
296 | - } |
|
297 | - $this->modules[$module] = $this->registeredModules[$module]; |
|
298 | - } |
|
299 | - |
|
300 | - /** |
|
301 | - * Retrieves merged element definitions. |
|
302 | - * @return Array of HTMLPurifier_ElementDef |
|
303 | - */ |
|
304 | - public function getElements() { |
|
305 | - |
|
306 | - $elements = array(); |
|
307 | - foreach ($this->modules as $module) { |
|
308 | - if (!$this->trusted && !$module->safe) continue; |
|
309 | - foreach ($module->info as $name => $v) { |
|
310 | - if (isset($elements[$name])) continue; |
|
311 | - $elements[$name] = $this->getElement($name); |
|
312 | - } |
|
313 | - } |
|
314 | - |
|
315 | - // remove dud elements, this happens when an element that |
|
316 | - // appeared to be safe actually wasn't |
|
317 | - foreach ($elements as $n => $v) { |
|
318 | - if ($v === false) unset($elements[$n]); |
|
319 | - } |
|
320 | - |
|
321 | - return $elements; |
|
322 | - |
|
323 | - } |
|
324 | - |
|
325 | - /** |
|
326 | - * Retrieves a single merged element definition |
|
327 | - * @param $name Name of element |
|
328 | - * @param $trusted Boolean trusted overriding parameter: set to true |
|
329 | - * if you want the full version of an element |
|
330 | - * @return Merged HTMLPurifier_ElementDef |
|
331 | - * @note You may notice that modules are getting iterated over twice (once |
|
332 | - * in getElements() and once here). This |
|
333 | - * is because |
|
334 | - */ |
|
335 | - public function getElement($name, $trusted = null) { |
|
336 | - |
|
337 | - if (!isset($this->elementLookup[$name])) { |
|
338 | - return false; |
|
339 | - } |
|
340 | - |
|
341 | - // setup global state variables |
|
342 | - $def = false; |
|
343 | - if ($trusted === null) $trusted = $this->trusted; |
|
344 | - |
|
345 | - // iterate through each module that has registered itself to this |
|
346 | - // element |
|
347 | - foreach($this->elementLookup[$name] as $module_name) { |
|
348 | - |
|
349 | - $module = $this->modules[$module_name]; |
|
350 | - |
|
351 | - // refuse to create/merge from a module that is deemed unsafe-- |
|
352 | - // pretend the module doesn't exist--when trusted mode is not on. |
|
353 | - if (!$trusted && !$module->safe) { |
|
354 | - continue; |
|
355 | - } |
|
356 | - |
|
357 | - // clone is used because, ideally speaking, the original |
|
358 | - // definition should not be modified. Usually, this will |
|
359 | - // make no difference, but for consistency's sake |
|
360 | - $new_def = clone $module->info[$name]; |
|
361 | - |
|
362 | - if (!$def && $new_def->standalone) { |
|
363 | - $def = $new_def; |
|
364 | - } elseif ($def) { |
|
365 | - // This will occur even if $new_def is standalone. In practice, |
|
366 | - // this will usually result in a full replacement. |
|
367 | - $def->mergeIn($new_def); |
|
368 | - } else { |
|
369 | - // :TODO: |
|
370 | - // non-standalone definitions that don't have a standalone |
|
371 | - // to merge into could be deferred to the end |
|
372 | - // HOWEVER, it is perfectly valid for a non-standalone |
|
373 | - // definition to lack a standalone definition, even |
|
374 | - // after all processing: this allows us to safely |
|
375 | - // specify extra attributes for elements that may not be |
|
376 | - // enabled all in one place. In particular, this might |
|
377 | - // be the case for trusted elements. WARNING: care must |
|
378 | - // be taken that the /extra/ definitions are all safe. |
|
379 | - continue; |
|
380 | - } |
|
381 | - |
|
382 | - // attribute value expansions |
|
383 | - $this->attrCollections->performInclusions($def->attr); |
|
384 | - $this->attrCollections->expandIdentifiers($def->attr, $this->attrTypes); |
|
385 | - |
|
386 | - // descendants_are_inline, for ChildDef_Chameleon |
|
387 | - if (is_string($def->content_model) && |
|
388 | - strpos($def->content_model, 'Inline') !== false) { |
|
389 | - if ($name != 'del' && $name != 'ins') { |
|
390 | - // this is for you, ins/del |
|
391 | - $def->descendants_are_inline = true; |
|
392 | - } |
|
393 | - } |
|
394 | - |
|
395 | - $this->contentSets->generateChildDef($def, $module); |
|
396 | - } |
|
397 | - |
|
398 | - // This can occur if there is a blank definition, but no base to |
|
399 | - // mix it in with |
|
400 | - if (!$def) return false; |
|
401 | - |
|
402 | - // add information on required attributes |
|
403 | - foreach ($def->attr as $attr_name => $attr_def) { |
|
404 | - if ($attr_def->required) { |
|
405 | - $def->required_attr[] = $attr_name; |
|
406 | - } |
|
407 | - } |
|
408 | - |
|
409 | - return $def; |
|
410 | - |
|
411 | - } |
|
6 | + /** |
|
7 | + * Instance of HTMLPurifier_DoctypeRegistry |
|
8 | + */ |
|
9 | + public $doctypes; |
|
10 | + |
|
11 | + /** |
|
12 | + * Instance of current doctype |
|
13 | + */ |
|
14 | + public $doctype; |
|
15 | + |
|
16 | + /** |
|
17 | + * Instance of HTMLPurifier_AttrTypes |
|
18 | + */ |
|
19 | + public $attrTypes; |
|
20 | + |
|
21 | + /** |
|
22 | + * Active instances of modules for the specified doctype are |
|
23 | + * indexed, by name, in this array. |
|
24 | + */ |
|
25 | + public $modules = array(); |
|
26 | + |
|
27 | + /** |
|
28 | + * Array of recognized HTMLPurifier_Module instances, indexed by |
|
29 | + * module's class name. This array is usually lazy loaded, but a |
|
30 | + * user can overload a module by pre-emptively registering it. |
|
31 | + */ |
|
32 | + public $registeredModules = array(); |
|
33 | + |
|
34 | + /** |
|
35 | + * List of extra modules that were added by the user using addModule(). |
|
36 | + * These get unconditionally merged into the current doctype, whatever |
|
37 | + * it may be. |
|
38 | + */ |
|
39 | + public $userModules = array(); |
|
40 | + |
|
41 | + /** |
|
42 | + * Associative array of element name to list of modules that have |
|
43 | + * definitions for the element; this array is dynamically filled. |
|
44 | + */ |
|
45 | + public $elementLookup = array(); |
|
46 | + |
|
47 | + /** List of prefixes we should use for registering small names */ |
|
48 | + public $prefixes = array('HTMLPurifier_HTMLModule_'); |
|
49 | + |
|
50 | + public $contentSets; /**< Instance of HTMLPurifier_ContentSets */ |
|
51 | + public $attrCollections; /**< Instance of HTMLPurifier_AttrCollections */ |
|
52 | + |
|
53 | + /** If set to true, unsafe elements and attributes will be allowed */ |
|
54 | + public $trusted = false; |
|
55 | + |
|
56 | + public function __construct() { |
|
57 | + |
|
58 | + // editable internal objects |
|
59 | + $this->attrTypes = new HTMLPurifier_AttrTypes(); |
|
60 | + $this->doctypes = new HTMLPurifier_DoctypeRegistry(); |
|
61 | + |
|
62 | + // setup basic modules |
|
63 | + $common = array( |
|
64 | + 'CommonAttributes', 'Text', 'Hypertext', 'List', |
|
65 | + 'Presentation', 'Edit', 'Bdo', 'Tables', 'Image', |
|
66 | + 'StyleAttribute', |
|
67 | + // Unsafe: |
|
68 | + 'Scripting', 'Object', 'Forms', |
|
69 | + // Sorta legacy, but present in strict: |
|
70 | + 'Name', |
|
71 | + ); |
|
72 | + $transitional = array('Legacy', 'Target', 'Iframe'); |
|
73 | + $xml = array('XMLCommonAttributes'); |
|
74 | + $non_xml = array('NonXMLCommonAttributes'); |
|
75 | + |
|
76 | + // setup basic doctypes |
|
77 | + $this->doctypes->register( |
|
78 | + 'HTML 4.01 Transitional', false, |
|
79 | + array_merge($common, $transitional, $non_xml), |
|
80 | + array('Tidy_Transitional', 'Tidy_Proprietary'), |
|
81 | + array(), |
|
82 | + '-//W3C//DTD HTML 4.01 Transitional//EN', |
|
83 | + 'http://www.w3.org/TR/html4/loose.dtd' |
|
84 | + ); |
|
85 | + |
|
86 | + $this->doctypes->register( |
|
87 | + 'HTML 4.01 Strict', false, |
|
88 | + array_merge($common, $non_xml), |
|
89 | + array('Tidy_Strict', 'Tidy_Proprietary', 'Tidy_Name'), |
|
90 | + array(), |
|
91 | + '-//W3C//DTD HTML 4.01//EN', |
|
92 | + 'http://www.w3.org/TR/html4/strict.dtd' |
|
93 | + ); |
|
94 | + |
|
95 | + $this->doctypes->register( |
|
96 | + 'XHTML 1.0 Transitional', true, |
|
97 | + array_merge($common, $transitional, $xml, $non_xml), |
|
98 | + array('Tidy_Transitional', 'Tidy_XHTML', 'Tidy_Proprietary', 'Tidy_Name'), |
|
99 | + array(), |
|
100 | + '-//W3C//DTD XHTML 1.0 Transitional//EN', |
|
101 | + 'http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd' |
|
102 | + ); |
|
103 | + |
|
104 | + $this->doctypes->register( |
|
105 | + 'XHTML 1.0 Strict', true, |
|
106 | + array_merge($common, $xml, $non_xml), |
|
107 | + array('Tidy_Strict', 'Tidy_XHTML', 'Tidy_Strict', 'Tidy_Proprietary', 'Tidy_Name'), |
|
108 | + array(), |
|
109 | + '-//W3C//DTD XHTML 1.0 Strict//EN', |
|
110 | + 'http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd' |
|
111 | + ); |
|
112 | + |
|
113 | + $this->doctypes->register( |
|
114 | + 'XHTML 1.1', true, |
|
115 | + // Iframe is a real XHTML 1.1 module, despite being |
|
116 | + // "transitional"! |
|
117 | + array_merge($common, $xml, array('Ruby', 'Iframe')), |
|
118 | + array('Tidy_Strict', 'Tidy_XHTML', 'Tidy_Proprietary', 'Tidy_Strict', 'Tidy_Name'), // Tidy_XHTML1_1 |
|
119 | + array(), |
|
120 | + '-//W3C//DTD XHTML 1.1//EN', |
|
121 | + 'http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd' |
|
122 | + ); |
|
123 | + |
|
124 | + } |
|
125 | + |
|
126 | + /** |
|
127 | + * Registers a module to the recognized module list, useful for |
|
128 | + * overloading pre-existing modules. |
|
129 | + * @param $module Mixed: string module name, with or without |
|
130 | + * HTMLPurifier_HTMLModule prefix, or instance of |
|
131 | + * subclass of HTMLPurifier_HTMLModule. |
|
132 | + * @param $overload Boolean whether or not to overload previous modules. |
|
133 | + * If this is not set, and you do overload a module, |
|
134 | + * HTML Purifier will complain with a warning. |
|
135 | + * @note This function will not call autoload, you must instantiate |
|
136 | + * (and thus invoke) autoload outside the method. |
|
137 | + * @note If a string is passed as a module name, different variants |
|
138 | + * will be tested in this order: |
|
139 | + * - Check for HTMLPurifier_HTMLModule_$name |
|
140 | + * - Check all prefixes with $name in order they were added |
|
141 | + * - Check for literal object name |
|
142 | + * - Throw fatal error |
|
143 | + * If your object name collides with an internal class, specify |
|
144 | + * your module manually. All modules must have been included |
|
145 | + * externally: registerModule will not perform inclusions for you! |
|
146 | + */ |
|
147 | + public function registerModule($module, $overload = false) { |
|
148 | + if (is_string($module)) { |
|
149 | + // attempt to load the module |
|
150 | + $original_module = $module; |
|
151 | + $ok = false; |
|
152 | + foreach ($this->prefixes as $prefix) { |
|
153 | + $module = $prefix . $original_module; |
|
154 | + if (class_exists($module)) { |
|
155 | + $ok = true; |
|
156 | + break; |
|
157 | + } |
|
158 | + } |
|
159 | + if (!$ok) { |
|
160 | + $module = $original_module; |
|
161 | + if (!class_exists($module)) { |
|
162 | + trigger_error($original_module . ' module does not exist', |
|
163 | + E_USER_ERROR); |
|
164 | + return; |
|
165 | + } |
|
166 | + } |
|
167 | + $module = new $module(); |
|
168 | + } |
|
169 | + if (empty($module->name)) { |
|
170 | + trigger_error('Module instance of ' . get_class($module) . ' must have name'); |
|
171 | + return; |
|
172 | + } |
|
173 | + if (!$overload && isset($this->registeredModules[$module->name])) { |
|
174 | + trigger_error('Overloading ' . $module->name . ' without explicit overload parameter', E_USER_WARNING); |
|
175 | + } |
|
176 | + $this->registeredModules[$module->name] = $module; |
|
177 | + } |
|
178 | + |
|
179 | + /** |
|
180 | + * Adds a module to the current doctype by first registering it, |
|
181 | + * and then tacking it on to the active doctype |
|
182 | + */ |
|
183 | + public function addModule($module) { |
|
184 | + $this->registerModule($module); |
|
185 | + if (is_object($module)) $module = $module->name; |
|
186 | + $this->userModules[] = $module; |
|
187 | + } |
|
188 | + |
|
189 | + /** |
|
190 | + * Adds a class prefix that registerModule() will use to resolve a |
|
191 | + * string name to a concrete class |
|
192 | + */ |
|
193 | + public function addPrefix($prefix) { |
|
194 | + $this->prefixes[] = $prefix; |
|
195 | + } |
|
196 | + |
|
197 | + /** |
|
198 | + * Performs processing on modules, after being called you may |
|
199 | + * use getElement() and getElements() |
|
200 | + * @param $config Instance of HTMLPurifier_Config |
|
201 | + */ |
|
202 | + public function setup($config) { |
|
203 | + |
|
204 | + $this->trusted = $config->get('HTML.Trusted'); |
|
205 | + |
|
206 | + // generate |
|
207 | + $this->doctype = $this->doctypes->make($config); |
|
208 | + $modules = $this->doctype->modules; |
|
209 | + |
|
210 | + // take out the default modules that aren't allowed |
|
211 | + $lookup = $config->get('HTML.AllowedModules'); |
|
212 | + $special_cases = $config->get('HTML.CoreModules'); |
|
213 | + |
|
214 | + if (is_array($lookup)) { |
|
215 | + foreach ($modules as $k => $m) { |
|
216 | + if (isset($special_cases[$m])) continue; |
|
217 | + if (!isset($lookup[$m])) unset($modules[$k]); |
|
218 | + } |
|
219 | + } |
|
220 | + |
|
221 | + // custom modules |
|
222 | + if ($config->get('HTML.Proprietary')) { |
|
223 | + $modules[] = 'Proprietary'; |
|
224 | + } |
|
225 | + if ($config->get('HTML.SafeObject')) { |
|
226 | + $modules[] = 'SafeObject'; |
|
227 | + } |
|
228 | + if ($config->get('HTML.SafeEmbed')) { |
|
229 | + $modules[] = 'SafeEmbed'; |
|
230 | + } |
|
231 | + if ($config->get('HTML.Nofollow')) { |
|
232 | + $modules[] = 'Nofollow'; |
|
233 | + } |
|
234 | + if ($config->get('HTML.TargetBlank')) { |
|
235 | + $modules[] = 'TargetBlank'; |
|
236 | + } |
|
237 | + |
|
238 | + // merge in custom modules |
|
239 | + $modules = array_merge($modules, $this->userModules); |
|
240 | + |
|
241 | + foreach ($modules as $module) { |
|
242 | + $this->processModule($module); |
|
243 | + $this->modules[$module]->setup($config); |
|
244 | + } |
|
245 | + |
|
246 | + foreach ($this->doctype->tidyModules as $module) { |
|
247 | + $this->processModule($module); |
|
248 | + $this->modules[$module]->setup($config); |
|
249 | + } |
|
250 | + |
|
251 | + // prepare any injectors |
|
252 | + foreach ($this->modules as $module) { |
|
253 | + $n = array(); |
|
254 | + foreach ($module->info_injector as $i => $injector) { |
|
255 | + if (!is_object($injector)) { |
|
256 | + $class = "HTMLPurifier_Injector_$injector"; |
|
257 | + $injector = new $class; |
|
258 | + } |
|
259 | + $n[$injector->name] = $injector; |
|
260 | + } |
|
261 | + $module->info_injector = $n; |
|
262 | + } |
|
263 | + |
|
264 | + // setup lookup table based on all valid modules |
|
265 | + foreach ($this->modules as $module) { |
|
266 | + foreach ($module->info as $name => $def) { |
|
267 | + if (!isset($this->elementLookup[$name])) { |
|
268 | + $this->elementLookup[$name] = array(); |
|
269 | + } |
|
270 | + $this->elementLookup[$name][] = $module->name; |
|
271 | + } |
|
272 | + } |
|
273 | + |
|
274 | + // note the different choice |
|
275 | + $this->contentSets = new HTMLPurifier_ContentSets( |
|
276 | + // content set assembly deals with all possible modules, |
|
277 | + // not just ones deemed to be "safe" |
|
278 | + $this->modules |
|
279 | + ); |
|
280 | + $this->attrCollections = new HTMLPurifier_AttrCollections( |
|
281 | + $this->attrTypes, |
|
282 | + // there is no way to directly disable a global attribute, |
|
283 | + // but using AllowedAttributes or simply not including |
|
284 | + // the module in your custom doctype should be sufficient |
|
285 | + $this->modules |
|
286 | + ); |
|
287 | + } |
|
288 | + |
|
289 | + /** |
|
290 | + * Takes a module and adds it to the active module collection, |
|
291 | + * registering it if necessary. |
|
292 | + */ |
|
293 | + public function processModule($module) { |
|
294 | + if (!isset($this->registeredModules[$module]) || is_object($module)) { |
|
295 | + $this->registerModule($module); |
|
296 | + } |
|
297 | + $this->modules[$module] = $this->registeredModules[$module]; |
|
298 | + } |
|
299 | + |
|
300 | + /** |
|
301 | + * Retrieves merged element definitions. |
|
302 | + * @return Array of HTMLPurifier_ElementDef |
|
303 | + */ |
|
304 | + public function getElements() { |
|
305 | + |
|
306 | + $elements = array(); |
|
307 | + foreach ($this->modules as $module) { |
|
308 | + if (!$this->trusted && !$module->safe) continue; |
|
309 | + foreach ($module->info as $name => $v) { |
|
310 | + if (isset($elements[$name])) continue; |
|
311 | + $elements[$name] = $this->getElement($name); |
|
312 | + } |
|
313 | + } |
|
314 | + |
|
315 | + // remove dud elements, this happens when an element that |
|
316 | + // appeared to be safe actually wasn't |
|
317 | + foreach ($elements as $n => $v) { |
|
318 | + if ($v === false) unset($elements[$n]); |
|
319 | + } |
|
320 | + |
|
321 | + return $elements; |
|
322 | + |
|
323 | + } |
|
324 | + |
|
325 | + /** |
|
326 | + * Retrieves a single merged element definition |
|
327 | + * @param $name Name of element |
|
328 | + * @param $trusted Boolean trusted overriding parameter: set to true |
|
329 | + * if you want the full version of an element |
|
330 | + * @return Merged HTMLPurifier_ElementDef |
|
331 | + * @note You may notice that modules are getting iterated over twice (once |
|
332 | + * in getElements() and once here). This |
|
333 | + * is because |
|
334 | + */ |
|
335 | + public function getElement($name, $trusted = null) { |
|
336 | + |
|
337 | + if (!isset($this->elementLookup[$name])) { |
|
338 | + return false; |
|
339 | + } |
|
340 | + |
|
341 | + // setup global state variables |
|
342 | + $def = false; |
|
343 | + if ($trusted === null) $trusted = $this->trusted; |
|
344 | + |
|
345 | + // iterate through each module that has registered itself to this |
|
346 | + // element |
|
347 | + foreach($this->elementLookup[$name] as $module_name) { |
|
348 | + |
|
349 | + $module = $this->modules[$module_name]; |
|
350 | + |
|
351 | + // refuse to create/merge from a module that is deemed unsafe-- |
|
352 | + // pretend the module doesn't exist--when trusted mode is not on. |
|
353 | + if (!$trusted && !$module->safe) { |
|
354 | + continue; |
|
355 | + } |
|
356 | + |
|
357 | + // clone is used because, ideally speaking, the original |
|
358 | + // definition should not be modified. Usually, this will |
|
359 | + // make no difference, but for consistency's sake |
|
360 | + $new_def = clone $module->info[$name]; |
|
361 | + |
|
362 | + if (!$def && $new_def->standalone) { |
|
363 | + $def = $new_def; |
|
364 | + } elseif ($def) { |
|
365 | + // This will occur even if $new_def is standalone. In practice, |
|
366 | + // this will usually result in a full replacement. |
|
367 | + $def->mergeIn($new_def); |
|
368 | + } else { |
|
369 | + // :TODO: |
|
370 | + // non-standalone definitions that don't have a standalone |
|
371 | + // to merge into could be deferred to the end |
|
372 | + // HOWEVER, it is perfectly valid for a non-standalone |
|
373 | + // definition to lack a standalone definition, even |
|
374 | + // after all processing: this allows us to safely |
|
375 | + // specify extra attributes for elements that may not be |
|
376 | + // enabled all in one place. In particular, this might |
|
377 | + // be the case for trusted elements. WARNING: care must |
|
378 | + // be taken that the /extra/ definitions are all safe. |
|
379 | + continue; |
|
380 | + } |
|
381 | + |
|
382 | + // attribute value expansions |
|
383 | + $this->attrCollections->performInclusions($def->attr); |
|
384 | + $this->attrCollections->expandIdentifiers($def->attr, $this->attrTypes); |
|
385 | + |
|
386 | + // descendants_are_inline, for ChildDef_Chameleon |
|
387 | + if (is_string($def->content_model) && |
|
388 | + strpos($def->content_model, 'Inline') !== false) { |
|
389 | + if ($name != 'del' && $name != 'ins') { |
|
390 | + // this is for you, ins/del |
|
391 | + $def->descendants_are_inline = true; |
|
392 | + } |
|
393 | + } |
|
394 | + |
|
395 | + $this->contentSets->generateChildDef($def, $module); |
|
396 | + } |
|
397 | + |
|
398 | + // This can occur if there is a blank definition, but no base to |
|
399 | + // mix it in with |
|
400 | + if (!$def) return false; |
|
401 | + |
|
402 | + // add information on required attributes |
|
403 | + foreach ($def->attr as $attr_name => $attr_def) { |
|
404 | + if ($attr_def->required) { |
|
405 | + $def->required_attr[] = $attr_name; |
|
406 | + } |
|
407 | + } |
|
408 | + |
|
409 | + return $def; |
|
410 | + |
|
411 | + } |
|
412 | 412 | |
413 | 413 | } |
414 | 414 |
@@ -182,7 +182,9 @@ discard block |
||
182 | 182 | */ |
183 | 183 | public function addModule($module) { |
184 | 184 | $this->registerModule($module); |
185 | - if (is_object($module)) $module = $module->name; |
|
185 | + if (is_object($module)) { |
|
186 | + $module = $module->name; |
|
187 | + } |
|
186 | 188 | $this->userModules[] = $module; |
187 | 189 | } |
188 | 190 | |
@@ -213,8 +215,12 @@ discard block |
||
213 | 215 | |
214 | 216 | if (is_array($lookup)) { |
215 | 217 | foreach ($modules as $k => $m) { |
216 | - if (isset($special_cases[$m])) continue; |
|
217 | - if (!isset($lookup[$m])) unset($modules[$k]); |
|
218 | + if (isset($special_cases[$m])) { |
|
219 | + continue; |
|
220 | + } |
|
221 | + if (!isset($lookup[$m])) { |
|
222 | + unset($modules[$k]); |
|
223 | + } |
|
218 | 224 | } |
219 | 225 | } |
220 | 226 | |
@@ -305,9 +311,13 @@ discard block |
||
305 | 311 | |
306 | 312 | $elements = array(); |
307 | 313 | foreach ($this->modules as $module) { |
308 | - if (!$this->trusted && !$module->safe) continue; |
|
314 | + if (!$this->trusted && !$module->safe) { |
|
315 | + continue; |
|
316 | + } |
|
309 | 317 | foreach ($module->info as $name => $v) { |
310 | - if (isset($elements[$name])) continue; |
|
318 | + if (isset($elements[$name])) { |
|
319 | + continue; |
|
320 | + } |
|
311 | 321 | $elements[$name] = $this->getElement($name); |
312 | 322 | } |
313 | 323 | } |
@@ -315,7 +325,9 @@ discard block |
||
315 | 325 | // remove dud elements, this happens when an element that |
316 | 326 | // appeared to be safe actually wasn't |
317 | 327 | foreach ($elements as $n => $v) { |
318 | - if ($v === false) unset($elements[$n]); |
|
328 | + if ($v === false) { |
|
329 | + unset($elements[$n]); |
|
330 | + } |
|
319 | 331 | } |
320 | 332 | |
321 | 333 | return $elements; |
@@ -340,7 +352,9 @@ discard block |
||
340 | 352 | |
341 | 353 | // setup global state variables |
342 | 354 | $def = false; |
343 | - if ($trusted === null) $trusted = $this->trusted; |
|
355 | + if ($trusted === null) { |
|
356 | + $trusted = $this->trusted; |
|
357 | + } |
|
344 | 358 | |
345 | 359 | // iterate through each module that has registered itself to this |
346 | 360 | // element |
@@ -397,7 +411,9 @@ discard block |
||
397 | 411 | |
398 | 412 | // This can occur if there is a blank definition, but no base to |
399 | 413 | // mix it in with |
400 | - if (!$def) return false; |
|
414 | + if (!$def) { |
|
415 | + return false; |
|
416 | + } |
|
401 | 417 | |
402 | 418 | // add information on required attributes |
403 | 419 | foreach ($def->attr as $attr_name => $attr_def) { |
@@ -47,7 +47,7 @@ discard block |
||
47 | 47 | /** List of prefixes we should use for registering small names */ |
48 | 48 | public $prefixes = array('HTMLPurifier_HTMLModule_'); |
49 | 49 | |
50 | - public $contentSets; /**< Instance of HTMLPurifier_ContentSets */ |
|
50 | + public $contentSets; /**< Instance of HTMLPurifier_ContentSets */ |
|
51 | 51 | public $attrCollections; /**< Instance of HTMLPurifier_AttrCollections */ |
52 | 52 | |
53 | 53 | /** If set to true, unsafe elements and attributes will be allowed */ |
@@ -150,7 +150,7 @@ discard block |
||
150 | 150 | $original_module = $module; |
151 | 151 | $ok = false; |
152 | 152 | foreach ($this->prefixes as $prefix) { |
153 | - $module = $prefix . $original_module; |
|
153 | + $module = $prefix.$original_module; |
|
154 | 154 | if (class_exists($module)) { |
155 | 155 | $ok = true; |
156 | 156 | break; |
@@ -159,7 +159,7 @@ discard block |
||
159 | 159 | if (!$ok) { |
160 | 160 | $module = $original_module; |
161 | 161 | if (!class_exists($module)) { |
162 | - trigger_error($original_module . ' module does not exist', |
|
162 | + trigger_error($original_module.' module does not exist', |
|
163 | 163 | E_USER_ERROR); |
164 | 164 | return; |
165 | 165 | } |
@@ -167,11 +167,11 @@ discard block |
||
167 | 167 | $module = new $module(); |
168 | 168 | } |
169 | 169 | if (empty($module->name)) { |
170 | - trigger_error('Module instance of ' . get_class($module) . ' must have name'); |
|
170 | + trigger_error('Module instance of '.get_class($module).' must have name'); |
|
171 | 171 | return; |
172 | 172 | } |
173 | 173 | if (!$overload && isset($this->registeredModules[$module->name])) { |
174 | - trigger_error('Overloading ' . $module->name . ' without explicit overload parameter', E_USER_WARNING); |
|
174 | + trigger_error('Overloading '.$module->name.' without explicit overload parameter', E_USER_WARNING); |
|
175 | 175 | } |
176 | 176 | $this->registeredModules[$module->name] = $module; |
177 | 177 | } |
@@ -344,7 +344,7 @@ discard block |
||
344 | 344 | |
345 | 345 | // iterate through each module that has registered itself to this |
346 | 346 | // element |
347 | - foreach($this->elementLookup[$name] as $module_name) { |
|
347 | + foreach ($this->elementLookup[$name] as $module_name) { |
|
348 | 348 | |
349 | 349 | $module = $this->modules[$module_name]; |
350 | 350 |
@@ -19,7 +19,7 @@ |
||
19 | 19 | * Builds an IDAccumulator, also initializing the default blacklist |
20 | 20 | * @param $config Instance of HTMLPurifier_Config |
21 | 21 | * @param $context Instance of HTMLPurifier_Context |
22 | - * @return Fully initialized HTMLPurifier_IDAccumulator |
|
22 | + * @return HTMLPurifier_IDAccumulator initialized HTMLPurifier_IDAccumulator |
|
23 | 23 | */ |
24 | 24 | public static function build($config, $context) { |
25 | 25 | $id_accumulator = new HTMLPurifier_IDAccumulator(); |
@@ -9,44 +9,44 @@ |
||
9 | 9 | class HTMLPurifier_IDAccumulator |
10 | 10 | { |
11 | 11 | |
12 | - /** |
|
13 | - * Lookup table of IDs we've accumulated. |
|
14 | - * @public |
|
15 | - */ |
|
16 | - public $ids = array(); |
|
12 | + /** |
|
13 | + * Lookup table of IDs we've accumulated. |
|
14 | + * @public |
|
15 | + */ |
|
16 | + public $ids = array(); |
|
17 | 17 | |
18 | - /** |
|
19 | - * Builds an IDAccumulator, also initializing the default blacklist |
|
20 | - * @param $config Instance of HTMLPurifier_Config |
|
21 | - * @param $context Instance of HTMLPurifier_Context |
|
22 | - * @return Fully initialized HTMLPurifier_IDAccumulator |
|
23 | - */ |
|
24 | - public static function build($config, $context) { |
|
25 | - $id_accumulator = new HTMLPurifier_IDAccumulator(); |
|
26 | - $id_accumulator->load($config->get('Attr.IDBlacklist')); |
|
27 | - return $id_accumulator; |
|
28 | - } |
|
18 | + /** |
|
19 | + * Builds an IDAccumulator, also initializing the default blacklist |
|
20 | + * @param $config Instance of HTMLPurifier_Config |
|
21 | + * @param $context Instance of HTMLPurifier_Context |
|
22 | + * @return Fully initialized HTMLPurifier_IDAccumulator |
|
23 | + */ |
|
24 | + public static function build($config, $context) { |
|
25 | + $id_accumulator = new HTMLPurifier_IDAccumulator(); |
|
26 | + $id_accumulator->load($config->get('Attr.IDBlacklist')); |
|
27 | + return $id_accumulator; |
|
28 | + } |
|
29 | 29 | |
30 | - /** |
|
31 | - * Add an ID to the lookup table. |
|
32 | - * @param $id ID to be added. |
|
33 | - * @return Bool status, true if success, false if there's a dupe |
|
34 | - */ |
|
35 | - public function add($id) { |
|
36 | - if (isset($this->ids[$id])) return false; |
|
37 | - return $this->ids[$id] = true; |
|
38 | - } |
|
30 | + /** |
|
31 | + * Add an ID to the lookup table. |
|
32 | + * @param $id ID to be added. |
|
33 | + * @return Bool status, true if success, false if there's a dupe |
|
34 | + */ |
|
35 | + public function add($id) { |
|
36 | + if (isset($this->ids[$id])) return false; |
|
37 | + return $this->ids[$id] = true; |
|
38 | + } |
|
39 | 39 | |
40 | - /** |
|
41 | - * Load a list of IDs into the lookup table |
|
42 | - * @param $array_of_ids Array of IDs to load |
|
43 | - * @note This function doesn't care about duplicates |
|
44 | - */ |
|
45 | - public function load($array_of_ids) { |
|
46 | - foreach ($array_of_ids as $id) { |
|
47 | - $this->ids[$id] = true; |
|
48 | - } |
|
49 | - } |
|
40 | + /** |
|
41 | + * Load a list of IDs into the lookup table |
|
42 | + * @param $array_of_ids Array of IDs to load |
|
43 | + * @note This function doesn't care about duplicates |
|
44 | + */ |
|
45 | + public function load($array_of_ids) { |
|
46 | + foreach ($array_of_ids as $id) { |
|
47 | + $this->ids[$id] = true; |
|
48 | + } |
|
49 | + } |
|
50 | 50 | |
51 | 51 | } |
52 | 52 |
@@ -33,7 +33,9 @@ |
||
33 | 33 | * @return Bool status, true if success, false if there's a dupe |
34 | 34 | */ |
35 | 35 | public function add($id) { |
36 | - if (isset($this->ids[$id])) return false; |
|
36 | + if (isset($this->ids[$id])) { |
|
37 | + return false; |
|
38 | + } |
|
37 | 39 | return $this->ids[$id] = true; |
38 | 40 | } |
39 | 41 |
@@ -64,6 +64,7 @@ discard block |
||
64 | 64 | * result in infinite loops if not used carefully. |
65 | 65 | * @warning HTML Purifier will prevent you from fast-forwarding with this |
66 | 66 | * function. |
67 | + * @param integer $index |
|
67 | 68 | */ |
68 | 69 | public function rewind($index) { |
69 | 70 | $this->rewind = $index; |
@@ -123,8 +124,8 @@ discard block |
||
123 | 124 | |
124 | 125 | /** |
125 | 126 | * Tests if the context node allows a certain element |
126 | - * @param $name Name of element to test for |
|
127 | - * @return True if element is allowed, false if it is not |
|
127 | + * @param string $name Name of element to test for |
|
128 | + * @return boolean if element is allowed, false if it is not |
|
128 | 129 | */ |
129 | 130 | public function allowsElement($name) { |
130 | 131 | if (!empty($this->currentNesting)) { |
@@ -16,222 +16,222 @@ |
||
16 | 16 | abstract class HTMLPurifier_Injector |
17 | 17 | { |
18 | 18 | |
19 | - /** |
|
20 | - * Advisory name of injector, this is for friendly error messages |
|
21 | - */ |
|
22 | - public $name; |
|
23 | - |
|
24 | - /** |
|
25 | - * Instance of HTMLPurifier_HTMLDefinition |
|
26 | - */ |
|
27 | - protected $htmlDefinition; |
|
28 | - |
|
29 | - /** |
|
30 | - * Reference to CurrentNesting variable in Context. This is an array |
|
31 | - * list of tokens that we are currently "inside" |
|
32 | - */ |
|
33 | - protected $currentNesting; |
|
34 | - |
|
35 | - /** |
|
36 | - * Reference to InputTokens variable in Context. This is an array |
|
37 | - * list of the input tokens that are being processed. |
|
38 | - */ |
|
39 | - protected $inputTokens; |
|
40 | - |
|
41 | - /** |
|
42 | - * Reference to InputIndex variable in Context. This is an integer |
|
43 | - * array index for $this->inputTokens that indicates what token |
|
44 | - * is currently being processed. |
|
45 | - */ |
|
46 | - protected $inputIndex; |
|
47 | - |
|
48 | - /** |
|
49 | - * Array of elements and attributes this injector creates and therefore |
|
50 | - * need to be allowed by the definition. Takes form of |
|
51 | - * array('element' => array('attr', 'attr2'), 'element2') |
|
52 | - */ |
|
53 | - public $needed = array(); |
|
54 | - |
|
55 | - /** |
|
56 | - * Index of inputTokens to rewind to. |
|
57 | - */ |
|
58 | - protected $rewind = false; |
|
59 | - |
|
60 | - /** |
|
61 | - * Rewind to a spot to re-perform processing. This is useful if you |
|
62 | - * deleted a node, and now need to see if this change affected any |
|
63 | - * earlier nodes. Rewinding does not affect other injectors, and can |
|
64 | - * result in infinite loops if not used carefully. |
|
65 | - * @warning HTML Purifier will prevent you from fast-forwarding with this |
|
66 | - * function. |
|
67 | - */ |
|
68 | - public function rewind($index) { |
|
69 | - $this->rewind = $index; |
|
70 | - } |
|
71 | - |
|
72 | - /** |
|
73 | - * Retrieves rewind, and then unsets it. |
|
74 | - */ |
|
75 | - public function getRewind() { |
|
76 | - $r = $this->rewind; |
|
77 | - $this->rewind = false; |
|
78 | - return $r; |
|
79 | - } |
|
80 | - |
|
81 | - /** |
|
82 | - * Prepares the injector by giving it the config and context objects: |
|
83 | - * this allows references to important variables to be made within |
|
84 | - * the injector. This function also checks if the HTML environment |
|
85 | - * will work with the Injector (see checkNeeded()). |
|
86 | - * @param $config Instance of HTMLPurifier_Config |
|
87 | - * @param $context Instance of HTMLPurifier_Context |
|
88 | - * @return Boolean false if success, string of missing needed element/attribute if failure |
|
89 | - */ |
|
90 | - public function prepare($config, $context) { |
|
91 | - $this->htmlDefinition = $config->getHTMLDefinition(); |
|
92 | - // Even though this might fail, some unit tests ignore this and |
|
93 | - // still test checkNeeded, so be careful. Maybe get rid of that |
|
94 | - // dependency. |
|
95 | - $result = $this->checkNeeded($config); |
|
96 | - if ($result !== false) return $result; |
|
97 | - $this->currentNesting =& $context->get('CurrentNesting'); |
|
98 | - $this->inputTokens =& $context->get('InputTokens'); |
|
99 | - $this->inputIndex =& $context->get('InputIndex'); |
|
100 | - return false; |
|
101 | - } |
|
102 | - |
|
103 | - /** |
|
104 | - * This function checks if the HTML environment |
|
105 | - * will work with the Injector: if p tags are not allowed, the |
|
106 | - * Auto-Paragraphing injector should not be enabled. |
|
107 | - * @param $config Instance of HTMLPurifier_Config |
|
108 | - * @param $context Instance of HTMLPurifier_Context |
|
109 | - * @return Boolean false if success, string of missing needed element/attribute if failure |
|
110 | - */ |
|
111 | - public function checkNeeded($config) { |
|
112 | - $def = $config->getHTMLDefinition(); |
|
113 | - foreach ($this->needed as $element => $attributes) { |
|
114 | - if (is_int($element)) $element = $attributes; |
|
115 | - if (!isset($def->info[$element])) return $element; |
|
116 | - if (!is_array($attributes)) continue; |
|
117 | - foreach ($attributes as $name) { |
|
118 | - if (!isset($def->info[$element]->attr[$name])) return "$element.$name"; |
|
119 | - } |
|
120 | - } |
|
121 | - return false; |
|
122 | - } |
|
123 | - |
|
124 | - /** |
|
125 | - * Tests if the context node allows a certain element |
|
126 | - * @param $name Name of element to test for |
|
127 | - * @return True if element is allowed, false if it is not |
|
128 | - */ |
|
129 | - public function allowsElement($name) { |
|
130 | - if (!empty($this->currentNesting)) { |
|
131 | - $parent_token = array_pop($this->currentNesting); |
|
132 | - $this->currentNesting[] = $parent_token; |
|
133 | - $parent = $this->htmlDefinition->info[$parent_token->name]; |
|
134 | - } else { |
|
135 | - $parent = $this->htmlDefinition->info_parent_def; |
|
136 | - } |
|
137 | - if (!isset($parent->child->elements[$name]) || isset($parent->excludes[$name])) { |
|
138 | - return false; |
|
139 | - } |
|
140 | - // check for exclusion |
|
141 | - for ($i = count($this->currentNesting) - 2; $i >= 0; $i--) { |
|
142 | - $node = $this->currentNesting[$i]; |
|
143 | - $def = $this->htmlDefinition->info[$node->name]; |
|
144 | - if (isset($def->excludes[$name])) return false; |
|
145 | - } |
|
146 | - return true; |
|
147 | - } |
|
148 | - |
|
149 | - /** |
|
150 | - * Iterator function, which starts with the next token and continues until |
|
151 | - * you reach the end of the input tokens. |
|
152 | - * @warning Please prevent previous references from interfering with this |
|
153 | - * functions by setting $i = null beforehand! |
|
154 | - * @param &$i Current integer index variable for inputTokens |
|
155 | - * @param &$current Current token variable. Do NOT use $token, as that variable is also a reference |
|
156 | - */ |
|
157 | - protected function forward(&$i, &$current) { |
|
158 | - if ($i === null) $i = $this->inputIndex + 1; |
|
159 | - else $i++; |
|
160 | - if (!isset($this->inputTokens[$i])) return false; |
|
161 | - $current = $this->inputTokens[$i]; |
|
162 | - return true; |
|
163 | - } |
|
164 | - |
|
165 | - /** |
|
166 | - * Similar to _forward, but accepts a third parameter $nesting (which |
|
167 | - * should be initialized at 0) and stops when we hit the end tag |
|
168 | - * for the node $this->inputIndex starts in. |
|
169 | - */ |
|
170 | - protected function forwardUntilEndToken(&$i, &$current, &$nesting) { |
|
171 | - $result = $this->forward($i, $current); |
|
172 | - if (!$result) return false; |
|
173 | - if ($nesting === null) $nesting = 0; |
|
174 | - if ($current instanceof HTMLPurifier_Token_Start) $nesting++; |
|
175 | - elseif ($current instanceof HTMLPurifier_Token_End) { |
|
176 | - if ($nesting <= 0) return false; |
|
177 | - $nesting--; |
|
178 | - } |
|
179 | - return true; |
|
180 | - } |
|
181 | - |
|
182 | - /** |
|
183 | - * Iterator function, starts with the previous token and continues until |
|
184 | - * you reach the beginning of input tokens. |
|
185 | - * @warning Please prevent previous references from interfering with this |
|
186 | - * functions by setting $i = null beforehand! |
|
187 | - * @param &$i Current integer index variable for inputTokens |
|
188 | - * @param &$current Current token variable. Do NOT use $token, as that variable is also a reference |
|
189 | - */ |
|
190 | - protected function backward(&$i, &$current) { |
|
191 | - if ($i === null) $i = $this->inputIndex - 1; |
|
192 | - else $i--; |
|
193 | - if ($i < 0) return false; |
|
194 | - $current = $this->inputTokens[$i]; |
|
195 | - return true; |
|
196 | - } |
|
197 | - |
|
198 | - /** |
|
199 | - * Initializes the iterator at the current position. Use in a do {} while; |
|
200 | - * loop to force the _forward and _backward functions to start at the |
|
201 | - * current location. |
|
202 | - * @warning Please prevent previous references from interfering with this |
|
203 | - * functions by setting $i = null beforehand! |
|
204 | - * @param &$i Current integer index variable for inputTokens |
|
205 | - * @param &$current Current token variable. Do NOT use $token, as that variable is also a reference |
|
206 | - */ |
|
207 | - protected function current(&$i, &$current) { |
|
208 | - if ($i === null) $i = $this->inputIndex; |
|
209 | - $current = $this->inputTokens[$i]; |
|
210 | - } |
|
211 | - |
|
212 | - /** |
|
213 | - * Handler that is called when a text token is processed |
|
214 | - */ |
|
215 | - public function handleText(&$token) {} |
|
216 | - |
|
217 | - /** |
|
218 | - * Handler that is called when a start or empty token is processed |
|
219 | - */ |
|
220 | - public function handleElement(&$token) {} |
|
221 | - |
|
222 | - /** |
|
223 | - * Handler that is called when an end token is processed |
|
224 | - */ |
|
225 | - public function handleEnd(&$token) { |
|
226 | - $this->notifyEnd($token); |
|
227 | - } |
|
228 | - |
|
229 | - /** |
|
230 | - * Notifier that is called when an end token is processed |
|
231 | - * @note This differs from handlers in that the token is read-only |
|
232 | - * @deprecated |
|
233 | - */ |
|
234 | - public function notifyEnd($token) {} |
|
19 | + /** |
|
20 | + * Advisory name of injector, this is for friendly error messages |
|
21 | + */ |
|
22 | + public $name; |
|
23 | + |
|
24 | + /** |
|
25 | + * Instance of HTMLPurifier_HTMLDefinition |
|
26 | + */ |
|
27 | + protected $htmlDefinition; |
|
28 | + |
|
29 | + /** |
|
30 | + * Reference to CurrentNesting variable in Context. This is an array |
|
31 | + * list of tokens that we are currently "inside" |
|
32 | + */ |
|
33 | + protected $currentNesting; |
|
34 | + |
|
35 | + /** |
|
36 | + * Reference to InputTokens variable in Context. This is an array |
|
37 | + * list of the input tokens that are being processed. |
|
38 | + */ |
|
39 | + protected $inputTokens; |
|
40 | + |
|
41 | + /** |
|
42 | + * Reference to InputIndex variable in Context. This is an integer |
|
43 | + * array index for $this->inputTokens that indicates what token |
|
44 | + * is currently being processed. |
|
45 | + */ |
|
46 | + protected $inputIndex; |
|
47 | + |
|
48 | + /** |
|
49 | + * Array of elements and attributes this injector creates and therefore |
|
50 | + * need to be allowed by the definition. Takes form of |
|
51 | + * array('element' => array('attr', 'attr2'), 'element2') |
|
52 | + */ |
|
53 | + public $needed = array(); |
|
54 | + |
|
55 | + /** |
|
56 | + * Index of inputTokens to rewind to. |
|
57 | + */ |
|
58 | + protected $rewind = false; |
|
59 | + |
|
60 | + /** |
|
61 | + * Rewind to a spot to re-perform processing. This is useful if you |
|
62 | + * deleted a node, and now need to see if this change affected any |
|
63 | + * earlier nodes. Rewinding does not affect other injectors, and can |
|
64 | + * result in infinite loops if not used carefully. |
|
65 | + * @warning HTML Purifier will prevent you from fast-forwarding with this |
|
66 | + * function. |
|
67 | + */ |
|
68 | + public function rewind($index) { |
|
69 | + $this->rewind = $index; |
|
70 | + } |
|
71 | + |
|
72 | + /** |
|
73 | + * Retrieves rewind, and then unsets it. |
|
74 | + */ |
|
75 | + public function getRewind() { |
|
76 | + $r = $this->rewind; |
|
77 | + $this->rewind = false; |
|
78 | + return $r; |
|
79 | + } |
|
80 | + |
|
81 | + /** |
|
82 | + * Prepares the injector by giving it the config and context objects: |
|
83 | + * this allows references to important variables to be made within |
|
84 | + * the injector. This function also checks if the HTML environment |
|
85 | + * will work with the Injector (see checkNeeded()). |
|
86 | + * @param $config Instance of HTMLPurifier_Config |
|
87 | + * @param $context Instance of HTMLPurifier_Context |
|
88 | + * @return Boolean false if success, string of missing needed element/attribute if failure |
|
89 | + */ |
|
90 | + public function prepare($config, $context) { |
|
91 | + $this->htmlDefinition = $config->getHTMLDefinition(); |
|
92 | + // Even though this might fail, some unit tests ignore this and |
|
93 | + // still test checkNeeded, so be careful. Maybe get rid of that |
|
94 | + // dependency. |
|
95 | + $result = $this->checkNeeded($config); |
|
96 | + if ($result !== false) return $result; |
|
97 | + $this->currentNesting =& $context->get('CurrentNesting'); |
|
98 | + $this->inputTokens =& $context->get('InputTokens'); |
|
99 | + $this->inputIndex =& $context->get('InputIndex'); |
|
100 | + return false; |
|
101 | + } |
|
102 | + |
|
103 | + /** |
|
104 | + * This function checks if the HTML environment |
|
105 | + * will work with the Injector: if p tags are not allowed, the |
|
106 | + * Auto-Paragraphing injector should not be enabled. |
|
107 | + * @param $config Instance of HTMLPurifier_Config |
|
108 | + * @param $context Instance of HTMLPurifier_Context |
|
109 | + * @return Boolean false if success, string of missing needed element/attribute if failure |
|
110 | + */ |
|
111 | + public function checkNeeded($config) { |
|
112 | + $def = $config->getHTMLDefinition(); |
|
113 | + foreach ($this->needed as $element => $attributes) { |
|
114 | + if (is_int($element)) $element = $attributes; |
|
115 | + if (!isset($def->info[$element])) return $element; |
|
116 | + if (!is_array($attributes)) continue; |
|
117 | + foreach ($attributes as $name) { |
|
118 | + if (!isset($def->info[$element]->attr[$name])) return "$element.$name"; |
|
119 | + } |
|
120 | + } |
|
121 | + return false; |
|
122 | + } |
|
123 | + |
|
124 | + /** |
|
125 | + * Tests if the context node allows a certain element |
|
126 | + * @param $name Name of element to test for |
|
127 | + * @return True if element is allowed, false if it is not |
|
128 | + */ |
|
129 | + public function allowsElement($name) { |
|
130 | + if (!empty($this->currentNesting)) { |
|
131 | + $parent_token = array_pop($this->currentNesting); |
|
132 | + $this->currentNesting[] = $parent_token; |
|
133 | + $parent = $this->htmlDefinition->info[$parent_token->name]; |
|
134 | + } else { |
|
135 | + $parent = $this->htmlDefinition->info_parent_def; |
|
136 | + } |
|
137 | + if (!isset($parent->child->elements[$name]) || isset($parent->excludes[$name])) { |
|
138 | + return false; |
|
139 | + } |
|
140 | + // check for exclusion |
|
141 | + for ($i = count($this->currentNesting) - 2; $i >= 0; $i--) { |
|
142 | + $node = $this->currentNesting[$i]; |
|
143 | + $def = $this->htmlDefinition->info[$node->name]; |
|
144 | + if (isset($def->excludes[$name])) return false; |
|
145 | + } |
|
146 | + return true; |
|
147 | + } |
|
148 | + |
|
149 | + /** |
|
150 | + * Iterator function, which starts with the next token and continues until |
|
151 | + * you reach the end of the input tokens. |
|
152 | + * @warning Please prevent previous references from interfering with this |
|
153 | + * functions by setting $i = null beforehand! |
|
154 | + * @param &$i Current integer index variable for inputTokens |
|
155 | + * @param &$current Current token variable. Do NOT use $token, as that variable is also a reference |
|
156 | + */ |
|
157 | + protected function forward(&$i, &$current) { |
|
158 | + if ($i === null) $i = $this->inputIndex + 1; |
|
159 | + else $i++; |
|
160 | + if (!isset($this->inputTokens[$i])) return false; |
|
161 | + $current = $this->inputTokens[$i]; |
|
162 | + return true; |
|
163 | + } |
|
164 | + |
|
165 | + /** |
|
166 | + * Similar to _forward, but accepts a third parameter $nesting (which |
|
167 | + * should be initialized at 0) and stops when we hit the end tag |
|
168 | + * for the node $this->inputIndex starts in. |
|
169 | + */ |
|
170 | + protected function forwardUntilEndToken(&$i, &$current, &$nesting) { |
|
171 | + $result = $this->forward($i, $current); |
|
172 | + if (!$result) return false; |
|
173 | + if ($nesting === null) $nesting = 0; |
|
174 | + if ($current instanceof HTMLPurifier_Token_Start) $nesting++; |
|
175 | + elseif ($current instanceof HTMLPurifier_Token_End) { |
|
176 | + if ($nesting <= 0) return false; |
|
177 | + $nesting--; |
|
178 | + } |
|
179 | + return true; |
|
180 | + } |
|
181 | + |
|
182 | + /** |
|
183 | + * Iterator function, starts with the previous token and continues until |
|
184 | + * you reach the beginning of input tokens. |
|
185 | + * @warning Please prevent previous references from interfering with this |
|
186 | + * functions by setting $i = null beforehand! |
|
187 | + * @param &$i Current integer index variable for inputTokens |
|
188 | + * @param &$current Current token variable. Do NOT use $token, as that variable is also a reference |
|
189 | + */ |
|
190 | + protected function backward(&$i, &$current) { |
|
191 | + if ($i === null) $i = $this->inputIndex - 1; |
|
192 | + else $i--; |
|
193 | + if ($i < 0) return false; |
|
194 | + $current = $this->inputTokens[$i]; |
|
195 | + return true; |
|
196 | + } |
|
197 | + |
|
198 | + /** |
|
199 | + * Initializes the iterator at the current position. Use in a do {} while; |
|
200 | + * loop to force the _forward and _backward functions to start at the |
|
201 | + * current location. |
|
202 | + * @warning Please prevent previous references from interfering with this |
|
203 | + * functions by setting $i = null beforehand! |
|
204 | + * @param &$i Current integer index variable for inputTokens |
|
205 | + * @param &$current Current token variable. Do NOT use $token, as that variable is also a reference |
|
206 | + */ |
|
207 | + protected function current(&$i, &$current) { |
|
208 | + if ($i === null) $i = $this->inputIndex; |
|
209 | + $current = $this->inputTokens[$i]; |
|
210 | + } |
|
211 | + |
|
212 | + /** |
|
213 | + * Handler that is called when a text token is processed |
|
214 | + */ |
|
215 | + public function handleText(&$token) {} |
|
216 | + |
|
217 | + /** |
|
218 | + * Handler that is called when a start or empty token is processed |
|
219 | + */ |
|
220 | + public function handleElement(&$token) {} |
|
221 | + |
|
222 | + /** |
|
223 | + * Handler that is called when an end token is processed |
|
224 | + */ |
|
225 | + public function handleEnd(&$token) { |
|
226 | + $this->notifyEnd($token); |
|
227 | + } |
|
228 | + |
|
229 | + /** |
|
230 | + * Notifier that is called when an end token is processed |
|
231 | + * @note This differs from handlers in that the token is read-only |
|
232 | + * @deprecated |
|
233 | + */ |
|
234 | + public function notifyEnd($token) {} |
|
235 | 235 | |
236 | 236 | |
237 | 237 | } |
@@ -94,9 +94,9 @@ discard block |
||
94 | 94 | // dependency. |
95 | 95 | $result = $this->checkNeeded($config); |
96 | 96 | if ($result !== false) return $result; |
97 | - $this->currentNesting =& $context->get('CurrentNesting'); |
|
98 | - $this->inputTokens =& $context->get('InputTokens'); |
|
99 | - $this->inputIndex =& $context->get('InputIndex'); |
|
97 | + $this->currentNesting = & $context->get('CurrentNesting'); |
|
98 | + $this->inputTokens = & $context->get('InputTokens'); |
|
99 | + $this->inputIndex = & $context->get('InputIndex'); |
|
100 | 100 | return false; |
101 | 101 | } |
102 | 102 | |
@@ -171,7 +171,7 @@ discard block |
||
171 | 171 | $result = $this->forward($i, $current); |
172 | 172 | if (!$result) return false; |
173 | 173 | if ($nesting === null) $nesting = 0; |
174 | - if ($current instanceof HTMLPurifier_Token_Start) $nesting++; |
|
174 | + if ($current instanceof HTMLPurifier_Token_Start) $nesting++; |
|
175 | 175 | elseif ($current instanceof HTMLPurifier_Token_End) { |
176 | 176 | if ($nesting <= 0) return false; |
177 | 177 | $nesting--; |
@@ -93,7 +93,9 @@ discard block |
||
93 | 93 | // still test checkNeeded, so be careful. Maybe get rid of that |
94 | 94 | // dependency. |
95 | 95 | $result = $this->checkNeeded($config); |
96 | - if ($result !== false) return $result; |
|
96 | + if ($result !== false) { |
|
97 | + return $result; |
|
98 | + } |
|
97 | 99 | $this->currentNesting =& $context->get('CurrentNesting'); |
98 | 100 | $this->inputTokens =& $context->get('InputTokens'); |
99 | 101 | $this->inputIndex =& $context->get('InputIndex'); |
@@ -111,11 +113,19 @@ discard block |
||
111 | 113 | public function checkNeeded($config) { |
112 | 114 | $def = $config->getHTMLDefinition(); |
113 | 115 | foreach ($this->needed as $element => $attributes) { |
114 | - if (is_int($element)) $element = $attributes; |
|
115 | - if (!isset($def->info[$element])) return $element; |
|
116 | - if (!is_array($attributes)) continue; |
|
116 | + if (is_int($element)) { |
|
117 | + $element = $attributes; |
|
118 | + } |
|
119 | + if (!isset($def->info[$element])) { |
|
120 | + return $element; |
|
121 | + } |
|
122 | + if (!is_array($attributes)) { |
|
123 | + continue; |
|
124 | + } |
|
117 | 125 | foreach ($attributes as $name) { |
118 | - if (!isset($def->info[$element]->attr[$name])) return "$element.$name"; |
|
126 | + if (!isset($def->info[$element]->attr[$name])) { |
|
127 | + return "$element.$name"; |
|
128 | + } |
|
119 | 129 | } |
120 | 130 | } |
121 | 131 | return false; |
@@ -141,7 +151,9 @@ discard block |
||
141 | 151 | for ($i = count($this->currentNesting) - 2; $i >= 0; $i--) { |
142 | 152 | $node = $this->currentNesting[$i]; |
143 | 153 | $def = $this->htmlDefinition->info[$node->name]; |
144 | - if (isset($def->excludes[$name])) return false; |
|
154 | + if (isset($def->excludes[$name])) { |
|
155 | + return false; |
|
156 | + } |
|
145 | 157 | } |
146 | 158 | return true; |
147 | 159 | } |
@@ -155,9 +167,14 @@ discard block |
||
155 | 167 | * @param &$current Current token variable. Do NOT use $token, as that variable is also a reference |
156 | 168 | */ |
157 | 169 | protected function forward(&$i, &$current) { |
158 | - if ($i === null) $i = $this->inputIndex + 1; |
|
159 | - else $i++; |
|
160 | - if (!isset($this->inputTokens[$i])) return false; |
|
170 | + if ($i === null) { |
|
171 | + $i = $this->inputIndex + 1; |
|
172 | + } else { |
|
173 | + $i++; |
|
174 | + } |
|
175 | + if (!isset($this->inputTokens[$i])) { |
|
176 | + return false; |
|
177 | + } |
|
161 | 178 | $current = $this->inputTokens[$i]; |
162 | 179 | return true; |
163 | 180 | } |
@@ -169,11 +186,18 @@ discard block |
||
169 | 186 | */ |
170 | 187 | protected function forwardUntilEndToken(&$i, &$current, &$nesting) { |
171 | 188 | $result = $this->forward($i, $current); |
172 | - if (!$result) return false; |
|
173 | - if ($nesting === null) $nesting = 0; |
|
174 | - if ($current instanceof HTMLPurifier_Token_Start) $nesting++; |
|
175 | - elseif ($current instanceof HTMLPurifier_Token_End) { |
|
176 | - if ($nesting <= 0) return false; |
|
189 | + if (!$result) { |
|
190 | + return false; |
|
191 | + } |
|
192 | + if ($nesting === null) { |
|
193 | + $nesting = 0; |
|
194 | + } |
|
195 | + if ($current instanceof HTMLPurifier_Token_Start) { |
|
196 | + $nesting++; |
|
197 | + } elseif ($current instanceof HTMLPurifier_Token_End) { |
|
198 | + if ($nesting <= 0) { |
|
199 | + return false; |
|
200 | + } |
|
177 | 201 | $nesting--; |
178 | 202 | } |
179 | 203 | return true; |
@@ -188,9 +212,14 @@ discard block |
||
188 | 212 | * @param &$current Current token variable. Do NOT use $token, as that variable is also a reference |
189 | 213 | */ |
190 | 214 | protected function backward(&$i, &$current) { |
191 | - if ($i === null) $i = $this->inputIndex - 1; |
|
192 | - else $i--; |
|
193 | - if ($i < 0) return false; |
|
215 | + if ($i === null) { |
|
216 | + $i = $this->inputIndex - 1; |
|
217 | + } else { |
|
218 | + $i--; |
|
219 | + } |
|
220 | + if ($i < 0) { |
|
221 | + return false; |
|
222 | + } |
|
194 | 223 | $current = $this->inputTokens[$i]; |
195 | 224 | return true; |
196 | 225 | } |
@@ -205,7 +234,9 @@ discard block |
||
205 | 234 | * @param &$current Current token variable. Do NOT use $token, as that variable is also a reference |
206 | 235 | */ |
207 | 236 | protected function current(&$i, &$current) { |
208 | - if ($i === null) $i = $this->inputIndex; |
|
237 | + if ($i === null) { |
|
238 | + $i = $this->inputIndex; |
|
239 | + } |
|
209 | 240 | $current = $this->inputTokens[$i]; |
210 | 241 | } |
211 | 242 |