Total Complexity | 64 |
Total Lines | 420 |
Duplicated Lines | 0 % |
Changes | 6 | ||
Bugs | 0 | Features | 0 |
Complex classes like URLInfo_Parser often do a lot of different things. To break such a class down, we need to identify a cohesive component within that class. A common approach to find such a component is to look for fields/methods that share the same prefixes, or suffixes.
Once you have determined the fields that belong together, you can apply the Extract Class refactoring. If the component makes sense as a sub-class, Extract Subclass is also a candidate, and is often faster.
While breaking up the class, it is a good idea to analyze how other classes use URLInfo_Parser, and based on these observations, apply Extract Interface, too.
1 | <?php |
||
24 | class URLInfo_Parser |
||
25 | { |
||
26 | /** |
||
27 | * @var string |
||
28 | */ |
||
29 | protected $url; |
||
30 | |||
31 | /** |
||
32 | * @var bool |
||
33 | */ |
||
34 | protected $isValid = false; |
||
35 | |||
36 | /** |
||
37 | * @var array |
||
38 | */ |
||
39 | protected $info; |
||
40 | |||
41 | /** |
||
42 | * @var array|NULL |
||
43 | */ |
||
44 | protected $error; |
||
45 | |||
46 | /** |
||
47 | * @var string[] |
||
48 | */ |
||
49 | protected $knownSchemes = array( |
||
50 | 'ftp', |
||
51 | 'http', |
||
52 | 'https', |
||
53 | 'mailto', |
||
54 | 'tel', |
||
55 | 'data', |
||
56 | 'file', |
||
57 | 'git' |
||
58 | ); |
||
59 | |||
60 | /** |
||
61 | * Stores a list of all unicode characters in the URL |
||
62 | * that have been filtered out before parsing it with |
||
63 | * parse_url. |
||
64 | * |
||
65 | * @var string[]string |
||
66 | */ |
||
67 | protected $unicodeChars = array(); |
||
68 | |||
69 | /** |
||
70 | * @var bool |
||
71 | */ |
||
72 | protected $encodeUTF = false; |
||
73 | |||
74 | /** |
||
75 | * |
||
76 | * @param string $url The target URL. |
||
77 | * @param bool $encodeUTF Whether to URL encode any plain text unicode characters. |
||
78 | */ |
||
79 | public function __construct(string $url, bool $encodeUTF) |
||
80 | { |
||
81 | $this->url = $url; |
||
82 | $this->encodeUTF = $encodeUTF; |
||
83 | |||
84 | $this->parse(); |
||
85 | |||
86 | if(!$this->detectType()) { |
||
87 | $this->validate(); |
||
88 | } |
||
89 | } |
||
90 | |||
91 | /** |
||
92 | * Retrieves the array as parsed by PHP's parse_url, |
||
93 | * filtered and adjusted as necessary. |
||
94 | * |
||
95 | * @return array |
||
96 | */ |
||
97 | public function getInfo() : array |
||
98 | { |
||
99 | return $this->info; |
||
100 | } |
||
101 | |||
102 | protected function parse() |
||
103 | { |
||
104 | $this->filterUnicodeChars(); |
||
105 | |||
106 | $this->info = parse_url($this->url); |
||
107 | |||
108 | $this->filterParsed(); |
||
109 | |||
110 | // if the URL contains any URL characters, and we |
||
111 | // do not want them URL encoded, restore them. |
||
112 | if(!$this->encodeUTF && !empty($this->unicodeChars)) |
||
113 | { |
||
114 | $this->info = $this->restoreUnicodeChars($this->info); |
||
115 | } |
||
116 | } |
||
117 | |||
118 | /** |
||
119 | * Finds any non-url encoded unicode characters in |
||
120 | * the URL, and encodes them before the URL is |
||
121 | * passed to parse_url. |
||
122 | */ |
||
123 | protected function filterUnicodeChars() : void |
||
124 | { |
||
125 | $chars = ConvertHelper::string2array($this->url); |
||
126 | |||
127 | $keep = array(); |
||
128 | |||
129 | foreach($chars as $char) |
||
130 | { |
||
131 | if(preg_match('/\p{L}/usix', $char)) |
||
132 | { |
||
133 | $encoded = rawurlencode($char); |
||
134 | |||
135 | if($encoded != $char) |
||
136 | { |
||
137 | $this->unicodeChars[$encoded] = $char; |
||
138 | $char = $encoded; |
||
139 | } |
||
140 | } |
||
141 | |||
142 | $keep[] = $char; |
||
143 | } |
||
144 | |||
145 | $this->url = implode('', $keep); |
||
146 | } |
||
147 | |||
148 | protected function detectType() : bool |
||
149 | { |
||
150 | $types = array( |
||
151 | 'email', |
||
152 | 'fragmentLink', |
||
153 | 'phoneLink', |
||
154 | 'ipAddress' |
||
155 | ); |
||
156 | |||
157 | foreach($types as $type) |
||
158 | { |
||
159 | $method = 'detectType_'.$type; |
||
160 | |||
161 | if($this->$method() === true) |
||
162 | { |
||
163 | $this->isValid = true; |
||
164 | return true; |
||
165 | } |
||
166 | } |
||
167 | |||
168 | return false; |
||
169 | } |
||
170 | |||
171 | protected function validate() |
||
189 | } |
||
190 | |||
191 | protected function validate_hostIsPresent() : bool |
||
192 | { |
||
193 | // every link needs a host. This case can happen for ex, if |
||
194 | // the link starts with a typo with only one slash, like: |
||
195 | // "http:/hostname" |
||
196 | if(isset($this->info['host'])) { |
||
197 | return true; |
||
198 | } |
||
199 | |||
200 | $this->setError( |
||
201 | URLInfo::ERROR_MISSING_HOST, |
||
202 | t('Cannot determine the link\'s host name.') . ' ' . |
||
203 | t('This usually happens when there\'s a typo somewhere.') |
||
204 | ); |
||
205 | |||
206 | return false; |
||
207 | } |
||
208 | |||
209 | protected function validate_schemeIsSet() : bool |
||
210 | { |
||
211 | if(isset($this->info['scheme'])) { |
||
212 | return true; |
||
213 | } |
||
214 | |||
215 | // no scheme found: it may be an email address without the mailto: |
||
216 | // It can't be a variable, since without the scheme it would already |
||
217 | // have been recognized as a variable only link. |
||
218 | $this->setError( |
||
219 | URLInfo::ERROR_MISSING_SCHEME, |
||
220 | t('Cannot determine the link\'s scheme, e.g. %1$s.', 'http') |
||
221 | ); |
||
222 | |||
223 | return false; |
||
224 | } |
||
225 | |||
226 | protected function validate_schemeIsKnown() : bool |
||
227 | { |
||
228 | if(in_array($this->info['scheme'], $this->knownSchemes)) { |
||
229 | return true; |
||
230 | } |
||
231 | |||
232 | $this->setError( |
||
233 | URLInfo::ERROR_INVALID_SCHEME, |
||
234 | t('The scheme %1$s is not supported for links.', $this->info['scheme']) . ' ' . |
||
235 | t('Valid schemes are: %1$s.', implode(', ', $this->knownSchemes)) |
||
236 | ); |
||
237 | |||
238 | return false; |
||
239 | } |
||
240 | |||
241 | /** |
||
242 | * Goes through all information in the parse_url result |
||
243 | * array, and attempts to fix any user errors in formatting |
||
244 | * that can be recovered from, mostly regarding stray spaces. |
||
245 | */ |
||
246 | protected function filterParsed() : void |
||
284 | } |
||
285 | } |
||
286 | |||
287 | /** |
||
288 | * Recursively goes through the array, and converts all previously |
||
289 | * URL encoded characters with their unicode character counterparts. |
||
290 | * |
||
291 | * @param array $subject |
||
292 | * @return array |
||
293 | */ |
||
294 | protected function restoreUnicodeChars(array $subject) : array |
||
315 | } |
||
316 | |||
317 | /** |
||
318 | * Replaces all URL encoded unicode characters |
||
319 | * in the string with the unicode character. |
||
320 | * |
||
321 | * @param string $string |
||
322 | * @return string |
||
323 | */ |
||
324 | protected function restoreUnicodeChar(string $string) : string |
||
325 | { |
||
326 | if(strstr($string, '%')) |
||
327 | { |
||
328 | return str_replace(array_keys($this->unicodeChars), array_values($this->unicodeChars), $string); |
||
329 | } |
||
330 | |||
331 | return $string; |
||
332 | } |
||
333 | |||
334 | protected function detectType_email() : bool |
||
335 | { |
||
336 | if(isset($this->info['scheme']) && $this->info['scheme'] == 'mailto') { |
||
337 | $this->info['type'] = URLInfo::TYPE_EMAIL; |
||
338 | return true; |
||
339 | } |
||
340 | |||
341 | if(isset($this->info['path']) && preg_match(RegexHelper::REGEX_EMAIL, $this->info['path'])) |
||
342 | { |
||
343 | $this->info['scheme'] = 'mailto'; |
||
344 | $this->info['type'] = URLInfo::TYPE_EMAIL; |
||
345 | return true; |
||
346 | } |
||
347 | |||
348 | return false; |
||
349 | } |
||
350 | |||
351 | protected function detectType_ipAddress() : bool |
||
352 | { |
||
353 | if($this->isPathOnly() && preg_match(RegexHelper::REGEX_IPV4, $this->info['path'])) { |
||
354 | $this->info['host'] = $this->info['path']; |
||
355 | $this->info['scheme'] = 'https'; |
||
356 | unset($this->info['path']); |
||
357 | } |
||
358 | |||
359 | if($this->isHostOnly() && preg_match(RegexHelper::REGEX_IPV4, $this->info['host'])) { |
||
360 | $this->info['ip'] = $this->info['host']; |
||
361 | return true; |
||
362 | } |
||
363 | |||
364 | return false; |
||
365 | } |
||
366 | |||
367 | private function isPathOnly() : bool |
||
368 | { |
||
369 | return isset($this->info['path']) && !isset($this->info['host']) && !isset($this->info['scheme']); |
||
370 | } |
||
371 | |||
372 | private function isHostOnly() : bool |
||
375 | } |
||
376 | |||
377 | protected function detectType_fragmentLink() : bool |
||
378 | { |
||
379 | if(isset($this->info['fragment']) && !isset($this->info['scheme'])) { |
||
380 | $this->info['type'] = URLInfo::TYPE_FRAGMENT; |
||
381 | return true; |
||
382 | } |
||
383 | |||
384 | return false; |
||
385 | } |
||
386 | |||
387 | protected function detectType_phoneLink() : bool |
||
388 | { |
||
389 | if(isset($this->info['scheme']) && $this->info['scheme'] == 'tel') { |
||
390 | $this->info['type'] = URLInfo::TYPE_PHONE; |
||
391 | return true; |
||
392 | } |
||
393 | |||
394 | return false; |
||
395 | } |
||
396 | |||
397 | protected function setError(int $code, string $message) |
||
398 | { |
||
399 | $this->isValid = false; |
||
400 | |||
401 | $this->error = array( |
||
402 | 'code' => $code, |
||
403 | 'message' => $message |
||
404 | ); |
||
405 | } |
||
406 | |||
407 | /** |
||
408 | * Checks whether the URL that was parsed is valid. |
||
409 | * @return bool |
||
410 | */ |
||
411 | public function isValid() : bool |
||
412 | { |
||
413 | return $this->isValid; |
||
414 | } |
||
415 | |||
416 | /** |
||
417 | * If the validation failed, retrieves the validation |
||
418 | * error message. |
||
419 | * |
||
420 | * @return string |
||
421 | */ |
||
422 | public function getErrorMessage() : string |
||
423 | { |
||
424 | if(isset($this->error)) { |
||
425 | return $this->error['message']; |
||
426 | } |
||
427 | |||
428 | return ''; |
||
429 | } |
||
430 | |||
431 | /** |
||
432 | * If the validation failed, retrieves the validation |
||
433 | * error code. |
||
434 | * |
||
435 | * @return int |
||
436 | */ |
||
437 | public function getErrorCode() : int |
||
444 | } |
||
445 | } |
||
446 |