Total Complexity | 45 |
Total Lines | 302 |
Duplicated Lines | 0 % |
Changes | 0 |
Complex classes like HTMLPurifier_URI often do a lot of different things. To break such a class down, we need to identify a cohesive component within that class. A common approach to find such a component is to look for fields/methods that share the same prefixes, or suffixes.
Once you have determined the fields that belong together, you can apply the Extract Class refactoring. If the component makes sense as a sub-class, Extract Subclass is also a candidate, and is often faster.
While breaking up the class, it is a good idea to analyze how other classes use HTMLPurifier_URI, and based on these observations, apply Extract Interface, too.
1 | <?php |
||
11 | class HTMLPurifier_URI |
||
12 | { |
||
13 | /** |
||
14 | * @type string |
||
15 | */ |
||
16 | public $scheme; |
||
17 | |||
18 | /** |
||
19 | * @type string |
||
20 | */ |
||
21 | public $userinfo; |
||
22 | |||
23 | /** |
||
24 | * @type string |
||
25 | */ |
||
26 | public $host; |
||
27 | |||
28 | /** |
||
29 | * @type int |
||
30 | */ |
||
31 | public $port; |
||
32 | |||
33 | /** |
||
34 | * @type string |
||
35 | */ |
||
36 | public $path; |
||
37 | |||
38 | /** |
||
39 | * @type string |
||
40 | */ |
||
41 | public $query; |
||
42 | |||
43 | /** |
||
44 | * @type string |
||
45 | */ |
||
46 | public $fragment; |
||
47 | |||
48 | /** |
||
49 | * @param string $scheme |
||
50 | * @param string $userinfo |
||
51 | * @param string $host |
||
52 | * @param int $port |
||
53 | * @param string $path |
||
54 | * @param string $query |
||
55 | * @param string $fragment |
||
56 | * @note Automatically normalizes scheme and port |
||
57 | */ |
||
58 | public function __construct($scheme, $userinfo, $host, $port, $path, $query, $fragment) |
||
59 | { |
||
60 | $this->scheme = is_null($scheme) || ctype_lower($scheme) ? $scheme : strtolower($scheme); |
||
61 | $this->userinfo = $userinfo; |
||
62 | $this->host = $host; |
||
63 | $this->port = is_null($port) ? $port : (int)$port; |
||
|
|||
64 | $this->path = $path; |
||
65 | $this->query = $query; |
||
66 | $this->fragment = $fragment; |
||
67 | } |
||
68 | |||
69 | /** |
||
70 | * Retrieves a scheme object corresponding to the URI's scheme/default |
||
71 | * @param HTMLPurifier_Config $config |
||
72 | * @param HTMLPurifier_Context $context |
||
73 | * @return HTMLPurifier_URIScheme Scheme object appropriate for validating this URI |
||
74 | */ |
||
75 | public function getSchemeObj($config, $context) |
||
99 | } |
||
100 | |||
101 | /** |
||
102 | * Generic validation method applicable for all schemes. May modify |
||
103 | * this URI in order to get it into a compliant form. |
||
104 | * @param HTMLPurifier_Config $config |
||
105 | * @param HTMLPurifier_Context $context |
||
106 | * @return bool True if validation/filtering succeeds, false if failure |
||
107 | */ |
||
108 | public function validate($config, $context) |
||
109 | { |
||
110 | // ABNF definitions from RFC 3986 |
||
111 | $chars_sub_delims = '!$&\'()*+,;='; |
||
112 | $chars_gen_delims = ':/?#[]@'; |
||
113 | $chars_pchar = $chars_sub_delims . ':@'; |
||
114 | |||
115 | // validate host |
||
116 | if (!is_null($this->host)) { |
||
117 | $host_def = new HTMLPurifier_AttrDef_URI_Host(); |
||
118 | $this->host = $host_def->validate($this->host, $config, $context); |
||
119 | if ($this->host === false) { |
||
120 | $this->host = null; |
||
121 | } |
||
122 | } |
||
123 | |||
124 | // validate scheme |
||
125 | // NOTE: It's not appropriate to check whether or not this |
||
126 | // scheme is in our registry, since a URIFilter may convert a |
||
127 | // URI that we don't allow into one we do. So instead, we just |
||
128 | // check if the scheme can be dropped because there is no host |
||
129 | // and it is our default scheme. |
||
130 | if (!is_null($this->scheme) && is_null($this->host) || $this->host === '') { |
||
131 | // support for relative paths is pretty abysmal when the |
||
132 | // scheme is present, so axe it when possible |
||
133 | $def = $config->getDefinition('URI'); |
||
134 | if ($def->defaultScheme === $this->scheme) { |
||
135 | $this->scheme = null; |
||
136 | } |
||
137 | } |
||
138 | |||
139 | // validate username |
||
140 | if (!is_null($this->userinfo)) { |
||
141 | $encoder = new HTMLPurifier_PercentEncoder($chars_sub_delims . ':'); |
||
142 | $this->userinfo = $encoder->encode($this->userinfo); |
||
143 | } |
||
144 | |||
145 | // validate port |
||
146 | if (!is_null($this->port)) { |
||
147 | if ($this->port < 1 || $this->port > 65535) { |
||
148 | $this->port = null; |
||
149 | } |
||
150 | } |
||
151 | |||
152 | // validate path |
||
153 | $segments_encoder = new HTMLPurifier_PercentEncoder($chars_pchar . '/'); |
||
154 | if (!is_null($this->host)) { // this catches $this->host === '' |
||
155 | // path-abempty (hier and relative) |
||
156 | // http://www.example.com/my/path |
||
157 | // //www.example.com/my/path (looks odd, but works, and |
||
158 | // recognized by most browsers) |
||
159 | // (this set is valid or invalid on a scheme by scheme |
||
160 | // basis, so we'll deal with it later) |
||
161 | // file:///my/path |
||
162 | // ///my/path |
||
163 | $this->path = $segments_encoder->encode($this->path); |
||
164 | } elseif ($this->path !== '') { |
||
165 | if ($this->path[0] === '/') { |
||
166 | // path-absolute (hier and relative) |
||
167 | // http:/my/path |
||
168 | // /my/path |
||
169 | if (strlen($this->path) >= 2 && $this->path[1] === '/') { |
||
170 | // This could happen if both the host gets stripped |
||
171 | // out |
||
172 | // http://my/path |
||
173 | // //my/path |
||
174 | $this->path = ''; |
||
175 | } else { |
||
176 | $this->path = $segments_encoder->encode($this->path); |
||
177 | } |
||
178 | } elseif (!is_null($this->scheme)) { |
||
179 | // path-rootless (hier) |
||
180 | // http:my/path |
||
181 | // Short circuit evaluation means we don't need to check nz |
||
182 | $this->path = $segments_encoder->encode($this->path); |
||
183 | } else { |
||
184 | // path-noscheme (relative) |
||
185 | // my/path |
||
186 | // (once again, not checking nz) |
||
187 | $segment_nc_encoder = new HTMLPurifier_PercentEncoder($chars_sub_delims . '@'); |
||
188 | $c = strpos($this->path, '/'); |
||
189 | if ($c !== false) { |
||
190 | $this->path = |
||
191 | $segment_nc_encoder->encode(substr($this->path, 0, $c)) . |
||
192 | $segments_encoder->encode(substr($this->path, $c)); |
||
193 | } else { |
||
194 | $this->path = $segment_nc_encoder->encode($this->path); |
||
195 | } |
||
196 | } |
||
197 | } else { |
||
198 | // path-empty (hier and relative) |
||
199 | $this->path = ''; // just to be safe |
||
200 | } |
||
201 | |||
202 | // qf = query and fragment |
||
203 | $qf_encoder = new HTMLPurifier_PercentEncoder($chars_pchar . '/?'); |
||
204 | |||
205 | if (!is_null($this->query)) { |
||
206 | $this->query = $qf_encoder->encode($this->query); |
||
207 | } |
||
208 | |||
209 | if (!is_null($this->fragment)) { |
||
210 | $this->fragment = $qf_encoder->encode($this->fragment); |
||
211 | } |
||
212 | return true; |
||
213 | } |
||
214 | |||
215 | /** |
||
216 | * Convert URI back to string |
||
217 | * @return string URI appropriate for output |
||
218 | */ |
||
219 | public function toString() |
||
220 | { |
||
221 | // reconstruct authority |
||
222 | $authority = null; |
||
223 | // there is a rendering difference between a null authority |
||
224 | // (http:foo-bar) and an empty string authority |
||
225 | // (http:///foo-bar). |
||
226 | if (!is_null($this->host)) { |
||
227 | $authority = ''; |
||
228 | if (!is_null($this->userinfo)) { |
||
229 | $authority .= $this->userinfo . '@'; |
||
230 | } |
||
231 | $authority .= $this->host; |
||
232 | if (!is_null($this->port)) { |
||
233 | $authority .= ':' . $this->port; |
||
234 | } |
||
235 | } |
||
236 | |||
237 | // Reconstruct the result |
||
238 | // One might wonder about parsing quirks from browsers after |
||
239 | // this reconstruction. Unfortunately, parsing behavior depends |
||
240 | // on what *scheme* was employed (file:///foo is handled *very* |
||
241 | // differently than http:///foo), so unfortunately we have to |
||
242 | // defer to the schemes to do the right thing. |
||
243 | $result = ''; |
||
244 | if (!is_null($this->scheme)) { |
||
245 | $result .= $this->scheme . ':'; |
||
246 | } |
||
247 | if (!is_null($authority)) { |
||
248 | $result .= '//' . $authority; |
||
249 | } |
||
250 | $result .= $this->path; |
||
251 | if (!is_null($this->query)) { |
||
252 | $result .= '?' . $this->query; |
||
253 | } |
||
254 | if (!is_null($this->fragment)) { |
||
255 | $result .= '#' . $this->fragment; |
||
256 | } |
||
257 | |||
258 | return $result; |
||
259 | } |
||
260 | |||
261 | /** |
||
262 | * Returns true if this URL might be considered a 'local' URL given |
||
263 | * the current context. This is true when the host is null, or |
||
264 | * when it matches the host supplied to the configuration. |
||
265 | * |
||
266 | * Note that this does not do any scheme checking, so it is mostly |
||
267 | * only appropriate for metadata that doesn't care about protocol |
||
268 | * security. isBenign is probably what you actually want. |
||
269 | * @param HTMLPurifier_Config $config |
||
270 | * @param HTMLPurifier_Context $context |
||
271 | * @return bool |
||
272 | */ |
||
273 | public function isLocal($config, $context) |
||
283 | } |
||
284 | |||
285 | /** |
||
286 | * Returns true if this URL should be considered a 'benign' URL, |
||
287 | * that is: |
||
288 | * |
||
289 | * - It is a local URL (isLocal), and |
||
290 | * - It has a equal or better level of security |
||
291 | * @param HTMLPurifier_Config $config |
||
292 | * @param HTMLPurifier_Context $context |
||
293 | * @return bool |
||
294 | */ |
||
295 | public function isBenign($config, $context) |
||
313 | } |
||
314 | } |
||
315 | |||
316 | // vim: et sw=4 sts=4 |
||
317 |