@@ -8,28 +8,28 @@ |
||
8 | 8 | */ |
9 | 9 | class HTMLPurifier_URIFilter_SafeIframe extends HTMLPurifier_URIFilter |
10 | 10 | { |
11 | - public $name = 'SafeIframe'; |
|
12 | - public $always_load = true; |
|
13 | - protected $regexp = NULL; |
|
14 | - // XXX: The not so good bit about how this is all setup now is we |
|
15 | - // can't check HTML.SafeIframe in the 'prepare' step: we have to |
|
16 | - // defer till the actual filtering. |
|
17 | - public function prepare($config) { |
|
18 | - $this->regexp = $config->get('URI.SafeIframeRegexp'); |
|
19 | - return true; |
|
20 | - } |
|
21 | - public function filter(&$uri, $config, $context) { |
|
22 | - // check if filter not applicable |
|
23 | - if (!$config->get('HTML.SafeIframe')) return true; |
|
24 | - // check if the filter should actually trigger |
|
25 | - if (!$context->get('EmbeddedURI', true)) return true; |
|
26 | - $token = $context->get('CurrentToken', true); |
|
27 | - if (!($token && $token->name == 'iframe')) return true; |
|
28 | - // check if we actually have some whitelists enabled |
|
29 | - if ($this->regexp === null) return false; |
|
30 | - // actually check the whitelists |
|
31 | - return preg_match($this->regexp, $uri->toString()); |
|
32 | - } |
|
11 | + public $name = 'SafeIframe'; |
|
12 | + public $always_load = true; |
|
13 | + protected $regexp = NULL; |
|
14 | + // XXX: The not so good bit about how this is all setup now is we |
|
15 | + // can't check HTML.SafeIframe in the 'prepare' step: we have to |
|
16 | + // defer till the actual filtering. |
|
17 | + public function prepare($config) { |
|
18 | + $this->regexp = $config->get('URI.SafeIframeRegexp'); |
|
19 | + return true; |
|
20 | + } |
|
21 | + public function filter(&$uri, $config, $context) { |
|
22 | + // check if filter not applicable |
|
23 | + if (!$config->get('HTML.SafeIframe')) return true; |
|
24 | + // check if the filter should actually trigger |
|
25 | + if (!$context->get('EmbeddedURI', true)) return true; |
|
26 | + $token = $context->get('CurrentToken', true); |
|
27 | + if (!($token && $token->name == 'iframe')) return true; |
|
28 | + // check if we actually have some whitelists enabled |
|
29 | + if ($this->regexp === null) return false; |
|
30 | + // actually check the whitelists |
|
31 | + return preg_match($this->regexp, $uri->toString()); |
|
32 | + } |
|
33 | 33 | } |
34 | 34 | |
35 | 35 | // vim: et sw=4 sts=4 |
@@ -20,13 +20,21 @@ |
||
20 | 20 | } |
21 | 21 | public function filter(&$uri, $config, $context) { |
22 | 22 | // check if filter not applicable |
23 | - if (!$config->get('HTML.SafeIframe')) return true; |
|
23 | + if (!$config->get('HTML.SafeIframe')) { |
|
24 | + return true; |
|
25 | + } |
|
24 | 26 | // check if the filter should actually trigger |
25 | - if (!$context->get('EmbeddedURI', true)) return true; |
|
27 | + if (!$context->get('EmbeddedURI', true)) { |
|
28 | + return true; |
|
29 | + } |
|
26 | 30 | $token = $context->get('CurrentToken', true); |
27 | - if (!($token && $token->name == 'iframe')) return true; |
|
31 | + if (!($token && $token->name == 'iframe')) { |
|
32 | + return true; |
|
33 | + } |
|
28 | 34 | // check if we actually have some whitelists enabled |
29 | - if ($this->regexp === null) return false; |
|
35 | + if ($this->regexp === null) { |
|
36 | + return false; |
|
37 | + } |
|
30 | 38 | // actually check the whitelists |
31 | 39 | return preg_match($this->regexp, $uri->toString()); |
32 | 40 | } |
@@ -7,63 +7,63 @@ |
||
7 | 7 | class HTMLPurifier_URIParser |
8 | 8 | { |
9 | 9 | |
10 | - /** |
|
11 | - * Instance of HTMLPurifier_PercentEncoder to do normalization with. |
|
12 | - */ |
|
13 | - protected $percentEncoder; |
|
10 | + /** |
|
11 | + * Instance of HTMLPurifier_PercentEncoder to do normalization with. |
|
12 | + */ |
|
13 | + protected $percentEncoder; |
|
14 | 14 | |
15 | - public function __construct() { |
|
16 | - $this->percentEncoder = new HTMLPurifier_PercentEncoder(); |
|
17 | - } |
|
15 | + public function __construct() { |
|
16 | + $this->percentEncoder = new HTMLPurifier_PercentEncoder(); |
|
17 | + } |
|
18 | 18 | |
19 | - /** |
|
20 | - * Parses a URI. |
|
21 | - * @param $uri string URI to parse |
|
22 | - * @return HTMLPurifier_URI representation of URI. This representation has |
|
23 | - * not been validated yet and may not conform to RFC. |
|
24 | - */ |
|
25 | - public function parse($uri) { |
|
19 | + /** |
|
20 | + * Parses a URI. |
|
21 | + * @param $uri string URI to parse |
|
22 | + * @return HTMLPurifier_URI representation of URI. This representation has |
|
23 | + * not been validated yet and may not conform to RFC. |
|
24 | + */ |
|
25 | + public function parse($uri) { |
|
26 | 26 | |
27 | - $uri = $this->percentEncoder->normalize($uri); |
|
27 | + $uri = $this->percentEncoder->normalize($uri); |
|
28 | 28 | |
29 | - // Regexp is as per Appendix B. |
|
30 | - // Note that ["<>] are an addition to the RFC's recommended |
|
31 | - // characters, because they represent external delimeters. |
|
32 | - $r_URI = '!'. |
|
33 | - '(([^:/?#"<>]+):)?'. // 2. Scheme |
|
34 | - '(//([^/?#"<>]*))?'. // 4. Authority |
|
35 | - '([^?#"<>]*)'. // 5. Path |
|
36 | - '(\?([^#"<>]*))?'. // 7. Query |
|
37 | - '(#([^"<>]*))?'. // 8. Fragment |
|
38 | - '!'; |
|
29 | + // Regexp is as per Appendix B. |
|
30 | + // Note that ["<>] are an addition to the RFC's recommended |
|
31 | + // characters, because they represent external delimeters. |
|
32 | + $r_URI = '!'. |
|
33 | + '(([^:/?#"<>]+):)?'. // 2. Scheme |
|
34 | + '(//([^/?#"<>]*))?'. // 4. Authority |
|
35 | + '([^?#"<>]*)'. // 5. Path |
|
36 | + '(\?([^#"<>]*))?'. // 7. Query |
|
37 | + '(#([^"<>]*))?'. // 8. Fragment |
|
38 | + '!'; |
|
39 | 39 | |
40 | - $matches = array(); |
|
41 | - $result = preg_match($r_URI, $uri, $matches); |
|
40 | + $matches = array(); |
|
41 | + $result = preg_match($r_URI, $uri, $matches); |
|
42 | 42 | |
43 | - if (!$result) return false; // *really* invalid URI |
|
43 | + if (!$result) return false; // *really* invalid URI |
|
44 | 44 | |
45 | - // seperate out parts |
|
46 | - $scheme = !empty($matches[1]) ? $matches[2] : null; |
|
47 | - $authority = !empty($matches[3]) ? $matches[4] : null; |
|
48 | - $path = $matches[5]; // always present, can be empty |
|
49 | - $query = !empty($matches[6]) ? $matches[7] : null; |
|
50 | - $fragment = !empty($matches[8]) ? $matches[9] : null; |
|
45 | + // seperate out parts |
|
46 | + $scheme = !empty($matches[1]) ? $matches[2] : null; |
|
47 | + $authority = !empty($matches[3]) ? $matches[4] : null; |
|
48 | + $path = $matches[5]; // always present, can be empty |
|
49 | + $query = !empty($matches[6]) ? $matches[7] : null; |
|
50 | + $fragment = !empty($matches[8]) ? $matches[9] : null; |
|
51 | 51 | |
52 | - // further parse authority |
|
53 | - if ($authority !== null) { |
|
54 | - $r_authority = "/^((.+?)@)?(\[[^\]]+\]|[^:]*)(:(\d*))?/"; |
|
55 | - $matches = array(); |
|
56 | - preg_match($r_authority, $authority, $matches); |
|
57 | - $userinfo = !empty($matches[1]) ? $matches[2] : null; |
|
58 | - $host = !empty($matches[3]) ? $matches[3] : ''; |
|
59 | - $port = !empty($matches[4]) ? (int) $matches[5] : null; |
|
60 | - } else { |
|
61 | - $port = $host = $userinfo = null; |
|
62 | - } |
|
52 | + // further parse authority |
|
53 | + if ($authority !== null) { |
|
54 | + $r_authority = "/^((.+?)@)?(\[[^\]]+\]|[^:]*)(:(\d*))?/"; |
|
55 | + $matches = array(); |
|
56 | + preg_match($r_authority, $authority, $matches); |
|
57 | + $userinfo = !empty($matches[1]) ? $matches[2] : null; |
|
58 | + $host = !empty($matches[3]) ? $matches[3] : ''; |
|
59 | + $port = !empty($matches[4]) ? (int) $matches[5] : null; |
|
60 | + } else { |
|
61 | + $port = $host = $userinfo = null; |
|
62 | + } |
|
63 | 63 | |
64 | - return new HTMLPurifier_URI( |
|
65 | - $scheme, $userinfo, $host, $port, $path, $query, $fragment); |
|
66 | - } |
|
64 | + return new HTMLPurifier_URI( |
|
65 | + $scheme, $userinfo, $host, $port, $path, $query, $fragment); |
|
66 | + } |
|
67 | 67 | |
68 | 68 | } |
69 | 69 |
@@ -40,7 +40,10 @@ |
||
40 | 40 | $matches = array(); |
41 | 41 | $result = preg_match($r_URI, $uri, $matches); |
42 | 42 | |
43 | - if (!$result) return false; // *really* invalid URI |
|
43 | + if (!$result) { |
|
44 | + return false; |
|
45 | + } |
|
46 | + // *really* invalid URI |
|
44 | 47 | |
45 | 48 | // seperate out parts |
46 | 49 | $scheme = !empty($matches[1]) ? $matches[2] : null; |
@@ -30,11 +30,11 @@ |
||
30 | 30 | // Note that ["<>] are an addition to the RFC's recommended |
31 | 31 | // characters, because they represent external delimeters. |
32 | 32 | $r_URI = '!'. |
33 | - '(([^:/?#"<>]+):)?'. // 2. Scheme |
|
34 | - '(//([^/?#"<>]*))?'. // 4. Authority |
|
35 | - '([^?#"<>]*)'. // 5. Path |
|
36 | - '(\?([^#"<>]*))?'. // 7. Query |
|
37 | - '(#([^"<>]*))?'. // 8. Fragment |
|
33 | + '(([^:/?#"<>]+):)?'.// 2. Scheme |
|
34 | + '(//([^/?#"<>]*))?'.// 4. Authority |
|
35 | + '([^?#"<>]*)'.// 5. Path |
|
36 | + '(\?([^#"<>]*))?'.// 7. Query |
|
37 | + '(#([^"<>]*))?'.// 8. Fragment |
|
38 | 38 | '!'; |
39 | 39 | |
40 | 40 | $matches = array(); |
@@ -6,89 +6,89 @@ |
||
6 | 6 | abstract class HTMLPurifier_URIScheme |
7 | 7 | { |
8 | 8 | |
9 | - /** |
|
10 | - * Scheme's default port (integer). If an explicit port number is |
|
11 | - * specified that coincides with the default port, it will be |
|
12 | - * elided. |
|
13 | - */ |
|
14 | - public $default_port = null; |
|
9 | + /** |
|
10 | + * Scheme's default port (integer). If an explicit port number is |
|
11 | + * specified that coincides with the default port, it will be |
|
12 | + * elided. |
|
13 | + */ |
|
14 | + public $default_port = null; |
|
15 | 15 | |
16 | - /** |
|
17 | - * Whether or not URIs of this schem are locatable by a browser |
|
18 | - * http and ftp are accessible, while mailto and news are not. |
|
19 | - */ |
|
20 | - public $browsable = false; |
|
16 | + /** |
|
17 | + * Whether or not URIs of this schem are locatable by a browser |
|
18 | + * http and ftp are accessible, while mailto and news are not. |
|
19 | + */ |
|
20 | + public $browsable = false; |
|
21 | 21 | |
22 | - /** |
|
23 | - * Whether or not data transmitted over this scheme is encrypted. |
|
24 | - * https is secure, http is not. |
|
25 | - */ |
|
26 | - public $secure = false; |
|
22 | + /** |
|
23 | + * Whether or not data transmitted over this scheme is encrypted. |
|
24 | + * https is secure, http is not. |
|
25 | + */ |
|
26 | + public $secure = false; |
|
27 | 27 | |
28 | - /** |
|
29 | - * Whether or not the URI always uses <hier_part>, resolves edge cases |
|
30 | - * with making relative URIs absolute |
|
31 | - */ |
|
32 | - public $hierarchical = false; |
|
28 | + /** |
|
29 | + * Whether or not the URI always uses <hier_part>, resolves edge cases |
|
30 | + * with making relative URIs absolute |
|
31 | + */ |
|
32 | + public $hierarchical = false; |
|
33 | 33 | |
34 | - /** |
|
35 | - * Whether or not the URI may omit a hostname when the scheme is |
|
36 | - * explicitly specified, ala file:///path/to/file. As of writing, |
|
37 | - * 'file' is the only scheme that browsers support his properly. |
|
38 | - */ |
|
39 | - public $may_omit_host = false; |
|
34 | + /** |
|
35 | + * Whether or not the URI may omit a hostname when the scheme is |
|
36 | + * explicitly specified, ala file:///path/to/file. As of writing, |
|
37 | + * 'file' is the only scheme that browsers support his properly. |
|
38 | + */ |
|
39 | + public $may_omit_host = false; |
|
40 | 40 | |
41 | - /** |
|
42 | - * Validates the components of a URI for a specific scheme. |
|
43 | - * @param $uri Reference to a HTMLPurifier_URI object |
|
44 | - * @param $config HTMLPurifier_Config object |
|
45 | - * @param $context HTMLPurifier_Context object |
|
46 | - * @return Bool success or failure |
|
47 | - */ |
|
48 | - public abstract function doValidate(&$uri, $config, $context); |
|
41 | + /** |
|
42 | + * Validates the components of a URI for a specific scheme. |
|
43 | + * @param $uri Reference to a HTMLPurifier_URI object |
|
44 | + * @param $config HTMLPurifier_Config object |
|
45 | + * @param $context HTMLPurifier_Context object |
|
46 | + * @return Bool success or failure |
|
47 | + */ |
|
48 | + public abstract function doValidate(&$uri, $config, $context); |
|
49 | 49 | |
50 | - /** |
|
51 | - * Public interface for validating components of a URI. Performs a |
|
52 | - * bunch of default actions. Don't overload this method. |
|
53 | - * @param $uri Reference to a HTMLPurifier_URI object |
|
54 | - * @param $config HTMLPurifier_Config object |
|
55 | - * @param $context HTMLPurifier_Context object |
|
56 | - * @return Bool success or failure |
|
57 | - */ |
|
58 | - public function validate(&$uri, $config, $context) { |
|
59 | - if ($this->default_port == $uri->port) $uri->port = null; |
|
60 | - // kludge: browsers do funny things when the scheme but not the |
|
61 | - // authority is set |
|
62 | - if (!$this->may_omit_host && |
|
63 | - // if the scheme is present, a missing host is always in error |
|
64 | - (!is_null($uri->scheme) && ($uri->host === '' || is_null($uri->host))) || |
|
65 | - // if the scheme is not present, a *blank* host is in error, |
|
66 | - // since this translates into '///path' which most browsers |
|
67 | - // interpret as being 'http://path'. |
|
68 | - (is_null($uri->scheme) && $uri->host === '') |
|
69 | - ) { |
|
70 | - do { |
|
71 | - if (is_null($uri->scheme)) { |
|
72 | - if (substr($uri->path, 0, 2) != '//') { |
|
73 | - $uri->host = null; |
|
74 | - break; |
|
75 | - } |
|
76 | - // URI is '////path', so we cannot nullify the |
|
77 | - // host to preserve semantics. Try expanding the |
|
78 | - // hostname instead (fall through) |
|
79 | - } |
|
80 | - // first see if we can manually insert a hostname |
|
81 | - $host = $config->get('URI.Host'); |
|
82 | - if (!is_null($host)) { |
|
83 | - $uri->host = $host; |
|
84 | - } else { |
|
85 | - // we can't do anything sensible, reject the URL. |
|
86 | - return false; |
|
87 | - } |
|
88 | - } while (false); |
|
89 | - } |
|
90 | - return $this->doValidate($uri, $config, $context); |
|
91 | - } |
|
50 | + /** |
|
51 | + * Public interface for validating components of a URI. Performs a |
|
52 | + * bunch of default actions. Don't overload this method. |
|
53 | + * @param $uri Reference to a HTMLPurifier_URI object |
|
54 | + * @param $config HTMLPurifier_Config object |
|
55 | + * @param $context HTMLPurifier_Context object |
|
56 | + * @return Bool success or failure |
|
57 | + */ |
|
58 | + public function validate(&$uri, $config, $context) { |
|
59 | + if ($this->default_port == $uri->port) $uri->port = null; |
|
60 | + // kludge: browsers do funny things when the scheme but not the |
|
61 | + // authority is set |
|
62 | + if (!$this->may_omit_host && |
|
63 | + // if the scheme is present, a missing host is always in error |
|
64 | + (!is_null($uri->scheme) && ($uri->host === '' || is_null($uri->host))) || |
|
65 | + // if the scheme is not present, a *blank* host is in error, |
|
66 | + // since this translates into '///path' which most browsers |
|
67 | + // interpret as being 'http://path'. |
|
68 | + (is_null($uri->scheme) && $uri->host === '') |
|
69 | + ) { |
|
70 | + do { |
|
71 | + if (is_null($uri->scheme)) { |
|
72 | + if (substr($uri->path, 0, 2) != '//') { |
|
73 | + $uri->host = null; |
|
74 | + break; |
|
75 | + } |
|
76 | + // URI is '////path', so we cannot nullify the |
|
77 | + // host to preserve semantics. Try expanding the |
|
78 | + // hostname instead (fall through) |
|
79 | + } |
|
80 | + // first see if we can manually insert a hostname |
|
81 | + $host = $config->get('URI.Host'); |
|
82 | + if (!is_null($host)) { |
|
83 | + $uri->host = $host; |
|
84 | + } else { |
|
85 | + // we can't do anything sensible, reject the URL. |
|
86 | + return false; |
|
87 | + } |
|
88 | + } while (false); |
|
89 | + } |
|
90 | + return $this->doValidate($uri, $config, $context); |
|
91 | + } |
|
92 | 92 | |
93 | 93 | } |
94 | 94 |
@@ -56,7 +56,9 @@ |
||
56 | 56 | * @return Bool success or failure |
57 | 57 | */ |
58 | 58 | public function validate(&$uri, $config, $context) { |
59 | - if ($this->default_port == $uri->port) $uri->port = null; |
|
59 | + if ($this->default_port == $uri->port) { |
|
60 | + $uri->port = null; |
|
61 | + } |
|
60 | 62 | // kludge: browsers do funny things when the scheme but not the |
61 | 63 | // authority is set |
62 | 64 | if (!$this->may_omit_host && |
@@ -5,92 +5,92 @@ |
||
5 | 5 | */ |
6 | 6 | class HTMLPurifier_URIScheme_data extends HTMLPurifier_URIScheme { |
7 | 7 | |
8 | - public $browsable = true; |
|
9 | - public $allowed_types = array( |
|
10 | - // you better write validation code for other types if you |
|
11 | - // decide to allow them |
|
12 | - 'image/jpeg' => true, |
|
13 | - 'image/gif' => true, |
|
14 | - 'image/png' => true, |
|
15 | - ); |
|
16 | - // this is actually irrelevant since we only write out the path |
|
17 | - // component |
|
18 | - public $may_omit_host = true; |
|
8 | + public $browsable = true; |
|
9 | + public $allowed_types = array( |
|
10 | + // you better write validation code for other types if you |
|
11 | + // decide to allow them |
|
12 | + 'image/jpeg' => true, |
|
13 | + 'image/gif' => true, |
|
14 | + 'image/png' => true, |
|
15 | + ); |
|
16 | + // this is actually irrelevant since we only write out the path |
|
17 | + // component |
|
18 | + public $may_omit_host = true; |
|
19 | 19 | |
20 | - public function doValidate(&$uri, $config, $context) { |
|
21 | - $result = explode(',', $uri->path, 2); |
|
22 | - $is_base64 = false; |
|
23 | - $charset = null; |
|
24 | - $content_type = null; |
|
25 | - if (count($result) == 2) { |
|
26 | - list($metadata, $data) = $result; |
|
27 | - // do some legwork on the metadata |
|
28 | - $metas = explode(';', $metadata); |
|
29 | - while(!empty($metas)) { |
|
30 | - $cur = array_shift($metas); |
|
31 | - if ($cur == 'base64') { |
|
32 | - $is_base64 = true; |
|
33 | - break; |
|
34 | - } |
|
35 | - if (substr($cur, 0, 8) == 'charset=') { |
|
36 | - // doesn't match if there are arbitrary spaces, but |
|
37 | - // whatever dude |
|
38 | - if ($charset !== null) continue; // garbage |
|
39 | - $charset = substr($cur, 8); // not used |
|
40 | - } else { |
|
41 | - if ($content_type !== null) continue; // garbage |
|
42 | - $content_type = $cur; |
|
43 | - } |
|
44 | - } |
|
45 | - } else { |
|
46 | - $data = $result[0]; |
|
47 | - } |
|
48 | - if ($content_type !== null && empty($this->allowed_types[$content_type])) { |
|
49 | - return false; |
|
50 | - } |
|
51 | - if ($charset !== null) { |
|
52 | - // error; we don't allow plaintext stuff |
|
53 | - $charset = null; |
|
54 | - } |
|
55 | - $data = rawurldecode($data); |
|
56 | - if ($is_base64) { |
|
57 | - $raw_data = base64_decode($data); |
|
58 | - } else { |
|
59 | - $raw_data = $data; |
|
60 | - } |
|
61 | - // XXX probably want to refactor this into a general mechanism |
|
62 | - // for filtering arbitrary content types |
|
63 | - $file = tempnam("/tmp", ""); |
|
64 | - file_put_contents($file, $raw_data, LOCK_EX); |
|
65 | - if (function_exists('exif_imagetype')) { |
|
66 | - $image_code = exif_imagetype($file); |
|
67 | - } elseif (function_exists('getimagesize')) { |
|
68 | - set_error_handler(array($this, 'muteErrorHandler')); |
|
69 | - $info = getimagesize($file); |
|
70 | - restore_error_handler(); |
|
71 | - if ($info == false) return false; |
|
72 | - $image_code = $info[2]; |
|
73 | - } else { |
|
74 | - trigger_error("could not find exif_imagetype or getimagesize functions", E_USER_ERROR); |
|
75 | - } |
|
76 | - $real_content_type = image_type_to_mime_type($image_code); |
|
77 | - if ($real_content_type != $content_type) { |
|
78 | - // we're nice guys; if the content type is something else we |
|
79 | - // support, change it over |
|
80 | - if (empty($this->allowed_types[$real_content_type])) return false; |
|
81 | - $content_type = $real_content_type; |
|
82 | - } |
|
83 | - // ok, it's kosher, rewrite what we need |
|
84 | - $uri->userinfo = null; |
|
85 | - $uri->host = null; |
|
86 | - $uri->port = null; |
|
87 | - $uri->fragment = null; |
|
88 | - $uri->query = null; |
|
89 | - $uri->path = "$content_type;base64," . base64_encode($raw_data); |
|
90 | - return true; |
|
91 | - } |
|
20 | + public function doValidate(&$uri, $config, $context) { |
|
21 | + $result = explode(',', $uri->path, 2); |
|
22 | + $is_base64 = false; |
|
23 | + $charset = null; |
|
24 | + $content_type = null; |
|
25 | + if (count($result) == 2) { |
|
26 | + list($metadata, $data) = $result; |
|
27 | + // do some legwork on the metadata |
|
28 | + $metas = explode(';', $metadata); |
|
29 | + while(!empty($metas)) { |
|
30 | + $cur = array_shift($metas); |
|
31 | + if ($cur == 'base64') { |
|
32 | + $is_base64 = true; |
|
33 | + break; |
|
34 | + } |
|
35 | + if (substr($cur, 0, 8) == 'charset=') { |
|
36 | + // doesn't match if there are arbitrary spaces, but |
|
37 | + // whatever dude |
|
38 | + if ($charset !== null) continue; // garbage |
|
39 | + $charset = substr($cur, 8); // not used |
|
40 | + } else { |
|
41 | + if ($content_type !== null) continue; // garbage |
|
42 | + $content_type = $cur; |
|
43 | + } |
|
44 | + } |
|
45 | + } else { |
|
46 | + $data = $result[0]; |
|
47 | + } |
|
48 | + if ($content_type !== null && empty($this->allowed_types[$content_type])) { |
|
49 | + return false; |
|
50 | + } |
|
51 | + if ($charset !== null) { |
|
52 | + // error; we don't allow plaintext stuff |
|
53 | + $charset = null; |
|
54 | + } |
|
55 | + $data = rawurldecode($data); |
|
56 | + if ($is_base64) { |
|
57 | + $raw_data = base64_decode($data); |
|
58 | + } else { |
|
59 | + $raw_data = $data; |
|
60 | + } |
|
61 | + // XXX probably want to refactor this into a general mechanism |
|
62 | + // for filtering arbitrary content types |
|
63 | + $file = tempnam("/tmp", ""); |
|
64 | + file_put_contents($file, $raw_data, LOCK_EX); |
|
65 | + if (function_exists('exif_imagetype')) { |
|
66 | + $image_code = exif_imagetype($file); |
|
67 | + } elseif (function_exists('getimagesize')) { |
|
68 | + set_error_handler(array($this, 'muteErrorHandler')); |
|
69 | + $info = getimagesize($file); |
|
70 | + restore_error_handler(); |
|
71 | + if ($info == false) return false; |
|
72 | + $image_code = $info[2]; |
|
73 | + } else { |
|
74 | + trigger_error("could not find exif_imagetype or getimagesize functions", E_USER_ERROR); |
|
75 | + } |
|
76 | + $real_content_type = image_type_to_mime_type($image_code); |
|
77 | + if ($real_content_type != $content_type) { |
|
78 | + // we're nice guys; if the content type is something else we |
|
79 | + // support, change it over |
|
80 | + if (empty($this->allowed_types[$real_content_type])) return false; |
|
81 | + $content_type = $real_content_type; |
|
82 | + } |
|
83 | + // ok, it's kosher, rewrite what we need |
|
84 | + $uri->userinfo = null; |
|
85 | + $uri->host = null; |
|
86 | + $uri->port = null; |
|
87 | + $uri->fragment = null; |
|
88 | + $uri->query = null; |
|
89 | + $uri->path = "$content_type;base64," . base64_encode($raw_data); |
|
90 | + return true; |
|
91 | + } |
|
92 | 92 | |
93 | - public function muteErrorHandler($errno, $errstr) {} |
|
93 | + public function muteErrorHandler($errno, $errstr) {} |
|
94 | 94 | |
95 | 95 | } |
96 | 96 |
@@ -35,10 +35,16 @@ discard block |
||
35 | 35 | if (substr($cur, 0, 8) == 'charset=') { |
36 | 36 | // doesn't match if there are arbitrary spaces, but |
37 | 37 | // whatever dude |
38 | - if ($charset !== null) continue; // garbage |
|
38 | + if ($charset !== null) { |
|
39 | + continue; |
|
40 | + } |
|
41 | + // garbage |
|
39 | 42 | $charset = substr($cur, 8); // not used |
40 | 43 | } else { |
41 | - if ($content_type !== null) continue; // garbage |
|
44 | + if ($content_type !== null) { |
|
45 | + continue; |
|
46 | + } |
|
47 | + // garbage |
|
42 | 48 | $content_type = $cur; |
43 | 49 | } |
44 | 50 | } |
@@ -68,7 +74,9 @@ discard block |
||
68 | 74 | set_error_handler(array($this, 'muteErrorHandler')); |
69 | 75 | $info = getimagesize($file); |
70 | 76 | restore_error_handler(); |
71 | - if ($info == false) return false; |
|
77 | + if ($info == false) { |
|
78 | + return false; |
|
79 | + } |
|
72 | 80 | $image_code = $info[2]; |
73 | 81 | } else { |
74 | 82 | trigger_error("could not find exif_imagetype or getimagesize functions", E_USER_ERROR); |
@@ -77,7 +85,9 @@ discard block |
||
77 | 85 | if ($real_content_type != $content_type) { |
78 | 86 | // we're nice guys; if the content type is something else we |
79 | 87 | // support, change it over |
80 | - if (empty($this->allowed_types[$real_content_type])) return false; |
|
88 | + if (empty($this->allowed_types[$real_content_type])) { |
|
89 | + return false; |
|
90 | + } |
|
81 | 91 | $content_type = $real_content_type; |
82 | 92 | } |
83 | 93 | // ok, it's kosher, rewrite what we need |
@@ -26,7 +26,7 @@ discard block |
||
26 | 26 | list($metadata, $data) = $result; |
27 | 27 | // do some legwork on the metadata |
28 | 28 | $metas = explode(';', $metadata); |
29 | - while(!empty($metas)) { |
|
29 | + while (!empty($metas)) { |
|
30 | 30 | $cur = array_shift($metas); |
31 | 31 | if ($cur == 'base64') { |
32 | 32 | $is_base64 = true; |
@@ -86,7 +86,7 @@ discard block |
||
86 | 86 | $uri->port = null; |
87 | 87 | $uri->fragment = null; |
88 | 88 | $uri->query = null; |
89 | - $uri->path = "$content_type;base64," . base64_encode($raw_data); |
|
89 | + $uri->path = "$content_type;base64,".base64_encode($raw_data); |
|
90 | 90 | return true; |
91 | 91 | } |
92 | 92 |
@@ -5,27 +5,27 @@ |
||
5 | 5 | */ |
6 | 6 | class HTMLPurifier_URIScheme_file extends HTMLPurifier_URIScheme { |
7 | 7 | |
8 | - // Generally file:// URLs are not accessible from most |
|
9 | - // machines, so placing them as an img src is incorrect. |
|
10 | - public $browsable = false; |
|
8 | + // Generally file:// URLs are not accessible from most |
|
9 | + // machines, so placing them as an img src is incorrect. |
|
10 | + public $browsable = false; |
|
11 | 11 | |
12 | - // Basically the *only* URI scheme for which this is true, since |
|
13 | - // accessing files on the local machine is very common. In fact, |
|
14 | - // browsers on some operating systems don't understand the |
|
15 | - // authority, though I hear it is used on Windows to refer to |
|
16 | - // network shares. |
|
17 | - public $may_omit_host = true; |
|
12 | + // Basically the *only* URI scheme for which this is true, since |
|
13 | + // accessing files on the local machine is very common. In fact, |
|
14 | + // browsers on some operating systems don't understand the |
|
15 | + // authority, though I hear it is used on Windows to refer to |
|
16 | + // network shares. |
|
17 | + public $may_omit_host = true; |
|
18 | 18 | |
19 | - public function doValidate(&$uri, $config, $context) { |
|
20 | - // Authentication method is not supported |
|
21 | - $uri->userinfo = null; |
|
22 | - // file:// makes no provisions for accessing the resource |
|
23 | - $uri->port = null; |
|
24 | - // While it seems to work on Firefox, the querystring has |
|
25 | - // no possible effect and is thus stripped. |
|
26 | - $uri->query = null; |
|
27 | - return true; |
|
28 | - } |
|
19 | + public function doValidate(&$uri, $config, $context) { |
|
20 | + // Authentication method is not supported |
|
21 | + $uri->userinfo = null; |
|
22 | + // file:// makes no provisions for accessing the resource |
|
23 | + $uri->port = null; |
|
24 | + // While it seems to work on Firefox, the querystring has |
|
25 | + // no possible effect and is thus stripped. |
|
26 | + $uri->query = null; |
|
27 | + return true; |
|
28 | + } |
|
29 | 29 | |
30 | 30 | } |
31 | 31 |
@@ -5,37 +5,37 @@ |
||
5 | 5 | */ |
6 | 6 | class HTMLPurifier_URIScheme_ftp extends HTMLPurifier_URIScheme { |
7 | 7 | |
8 | - public $default_port = 21; |
|
9 | - public $browsable = true; // usually |
|
10 | - public $hierarchical = true; |
|
8 | + public $default_port = 21; |
|
9 | + public $browsable = true; // usually |
|
10 | + public $hierarchical = true; |
|
11 | 11 | |
12 | - public function doValidate(&$uri, $config, $context) { |
|
13 | - $uri->query = null; |
|
12 | + public function doValidate(&$uri, $config, $context) { |
|
13 | + $uri->query = null; |
|
14 | 14 | |
15 | - // typecode check |
|
16 | - $semicolon_pos = strrpos($uri->path, ';'); // reverse |
|
17 | - if ($semicolon_pos !== false) { |
|
18 | - $type = substr($uri->path, $semicolon_pos + 1); // no semicolon |
|
19 | - $uri->path = substr($uri->path, 0, $semicolon_pos); |
|
20 | - $type_ret = ''; |
|
21 | - if (strpos($type, '=') !== false) { |
|
22 | - // figure out whether or not the declaration is correct |
|
23 | - list($key, $typecode) = explode('=', $type, 2); |
|
24 | - if ($key !== 'type') { |
|
25 | - // invalid key, tack it back on encoded |
|
26 | - $uri->path .= '%3B' . $type; |
|
27 | - } elseif ($typecode === 'a' || $typecode === 'i' || $typecode === 'd') { |
|
28 | - $type_ret = ";type=$typecode"; |
|
29 | - } |
|
30 | - } else { |
|
31 | - $uri->path .= '%3B' . $type; |
|
32 | - } |
|
33 | - $uri->path = str_replace(';', '%3B', $uri->path); |
|
34 | - $uri->path .= $type_ret; |
|
35 | - } |
|
15 | + // typecode check |
|
16 | + $semicolon_pos = strrpos($uri->path, ';'); // reverse |
|
17 | + if ($semicolon_pos !== false) { |
|
18 | + $type = substr($uri->path, $semicolon_pos + 1); // no semicolon |
|
19 | + $uri->path = substr($uri->path, 0, $semicolon_pos); |
|
20 | + $type_ret = ''; |
|
21 | + if (strpos($type, '=') !== false) { |
|
22 | + // figure out whether or not the declaration is correct |
|
23 | + list($key, $typecode) = explode('=', $type, 2); |
|
24 | + if ($key !== 'type') { |
|
25 | + // invalid key, tack it back on encoded |
|
26 | + $uri->path .= '%3B' . $type; |
|
27 | + } elseif ($typecode === 'a' || $typecode === 'i' || $typecode === 'd') { |
|
28 | + $type_ret = ";type=$typecode"; |
|
29 | + } |
|
30 | + } else { |
|
31 | + $uri->path .= '%3B' . $type; |
|
32 | + } |
|
33 | + $uri->path = str_replace(';', '%3B', $uri->path); |
|
34 | + $uri->path .= $type_ret; |
|
35 | + } |
|
36 | 36 | |
37 | - return true; |
|
38 | - } |
|
37 | + return true; |
|
38 | + } |
|
39 | 39 | |
40 | 40 | } |
41 | 41 |
@@ -23,12 +23,12 @@ |
||
23 | 23 | list($key, $typecode) = explode('=', $type, 2); |
24 | 24 | if ($key !== 'type') { |
25 | 25 | // invalid key, tack it back on encoded |
26 | - $uri->path .= '%3B' . $type; |
|
26 | + $uri->path .= '%3B'.$type; |
|
27 | 27 | } elseif ($typecode === 'a' || $typecode === 'i' || $typecode === 'd') { |
28 | 28 | $type_ret = ";type=$typecode"; |
29 | 29 | } |
30 | 30 | } else { |
31 | - $uri->path .= '%3B' . $type; |
|
31 | + $uri->path .= '%3B'.$type; |
|
32 | 32 | } |
33 | 33 | $uri->path = str_replace(';', '%3B', $uri->path); |
34 | 34 | $uri->path .= $type_ret; |
@@ -5,14 +5,14 @@ |
||
5 | 5 | */ |
6 | 6 | class HTMLPurifier_URIScheme_http extends HTMLPurifier_URIScheme { |
7 | 7 | |
8 | - public $default_port = 80; |
|
9 | - public $browsable = true; |
|
10 | - public $hierarchical = true; |
|
8 | + public $default_port = 80; |
|
9 | + public $browsable = true; |
|
10 | + public $hierarchical = true; |
|
11 | 11 | |
12 | - public function doValidate(&$uri, $config, $context) { |
|
13 | - $uri->userinfo = null; |
|
14 | - return true; |
|
15 | - } |
|
12 | + public function doValidate(&$uri, $config, $context) { |
|
13 | + $uri->userinfo = null; |
|
14 | + return true; |
|
15 | + } |
|
16 | 16 | |
17 | 17 | } |
18 | 18 |
@@ -5,8 +5,8 @@ |
||
5 | 5 | */ |
6 | 6 | class HTMLPurifier_URIScheme_https extends HTMLPurifier_URIScheme_http { |
7 | 7 | |
8 | - public $default_port = 443; |
|
9 | - public $secure = true; |
|
8 | + public $default_port = 443; |
|
9 | + public $secure = true; |
|
10 | 10 | |
11 | 11 | } |
12 | 12 |
@@ -11,16 +11,16 @@ |
||
11 | 11 | |
12 | 12 | class HTMLPurifier_URIScheme_mailto extends HTMLPurifier_URIScheme { |
13 | 13 | |
14 | - public $browsable = false; |
|
15 | - public $may_omit_host = true; |
|
14 | + public $browsable = false; |
|
15 | + public $may_omit_host = true; |
|
16 | 16 | |
17 | - public function doValidate(&$uri, $config, $context) { |
|
18 | - $uri->userinfo = null; |
|
19 | - $uri->host = null; |
|
20 | - $uri->port = null; |
|
21 | - // we need to validate path against RFC 2368's addr-spec |
|
22 | - return true; |
|
23 | - } |
|
17 | + public function doValidate(&$uri, $config, $context) { |
|
18 | + $uri->userinfo = null; |
|
19 | + $uri->host = null; |
|
20 | + $uri->port = null; |
|
21 | + // we need to validate path against RFC 2368's addr-spec |
|
22 | + return true; |
|
23 | + } |
|
24 | 24 | |
25 | 25 | } |
26 | 26 |