Total Complexity | 51 |
Total Lines | 379 |
Duplicated Lines | 0 % |
Changes | 4 | ||
Bugs | 0 | Features | 0 |
Complex classes like URLInfo_Parser often do a lot of different things. To break such a class down, we need to identify a cohesive component within that class. A common approach to find such a component is to look for fields/methods that share the same prefixes, or suffixes.
Once you have determined the fields that belong together, you can apply the Extract Class refactoring. If the component makes sense as a sub-class, Extract Subclass is also a candidate, and is often faster.
While breaking up the class, it is a good idea to analyze how other classes use URLInfo_Parser, and based on these observations, apply Extract Interface, too.
1 | <?php |
||
24 | class URLInfo_Parser |
||
25 | { |
||
26 | /** |
||
27 | * @var string |
||
28 | */ |
||
29 | protected $url; |
||
30 | |||
31 | /** |
||
32 | * @var bool |
||
33 | */ |
||
34 | protected $isValid = false; |
||
35 | |||
36 | /** |
||
37 | * @var array |
||
38 | */ |
||
39 | protected $info; |
||
40 | |||
41 | /** |
||
42 | * @var array|NULL |
||
43 | */ |
||
44 | protected $error; |
||
45 | |||
46 | /** |
||
47 | * @var string[] |
||
48 | */ |
||
49 | protected $knownSchemes = array( |
||
50 | 'ftp', |
||
51 | 'http', |
||
52 | 'https', |
||
53 | 'mailto', |
||
54 | 'tel', |
||
55 | 'data', |
||
56 | 'file' |
||
57 | ); |
||
58 | |||
59 | /** |
||
60 | * Stores a list of all unicode characters in the URL |
||
61 | * that have been filtered out before parsing it with |
||
62 | * parse_url. |
||
63 | * |
||
64 | * @var string[]string |
||
65 | */ |
||
66 | protected $unicodeChars = array(); |
||
67 | |||
68 | /** |
||
69 | * @var bool |
||
70 | */ |
||
71 | protected $encodeUTF = false; |
||
72 | |||
73 | /** |
||
74 | * |
||
75 | * @param string $url The target URL. |
||
76 | * @param bool $encodeUTF Whether to URL encode any plain text unicode characters. |
||
77 | */ |
||
78 | public function __construct(string $url, bool $encodeUTF) |
||
79 | { |
||
80 | $this->url = $url; |
||
81 | $this->encodeUTF = $encodeUTF; |
||
82 | |||
83 | $this->parse(); |
||
84 | |||
85 | if(!$this->detectType()) { |
||
86 | $this->validate(); |
||
87 | } |
||
88 | } |
||
89 | |||
90 | /** |
||
91 | * Retrieves the array as parsed by PHP's parse_url, |
||
92 | * filtered and adjusted as necessary. |
||
93 | * |
||
94 | * @return array |
||
95 | */ |
||
96 | public function getInfo() : array |
||
97 | { |
||
98 | return $this->info; |
||
99 | } |
||
100 | |||
101 | protected function parse() |
||
102 | { |
||
103 | $this->filterUnicodeChars(); |
||
104 | |||
105 | $this->info = parse_url($this->url); |
||
106 | |||
107 | $this->filterParsed(); |
||
108 | |||
109 | // if the URL contains any URL characters, and we |
||
110 | // do not want them URL encoded, restore them. |
||
111 | if(!$this->encodeUTF && !empty($this->unicodeChars)) |
||
112 | { |
||
113 | $this->info = $this->restoreUnicodeChars($this->info); |
||
114 | } |
||
115 | } |
||
116 | |||
117 | /** |
||
118 | * Finds any non-url encoded unicode characters in |
||
119 | * the URL, and encodes them before the URL is |
||
120 | * passed to parse_url. |
||
121 | */ |
||
122 | protected function filterUnicodeChars() : void |
||
123 | { |
||
124 | $chars = \AppUtils\ConvertHelper::string2array($this->url); |
||
125 | |||
126 | $keep = array(); |
||
127 | |||
128 | foreach($chars as $char) |
||
129 | { |
||
130 | if(preg_match('/\p{L}/usix', $char)) |
||
131 | { |
||
132 | $encoded = rawurlencode($char); |
||
133 | |||
134 | if($encoded != $char) |
||
135 | { |
||
136 | $this->unicodeChars[$encoded] = $char; |
||
137 | $char = $encoded; |
||
138 | } |
||
139 | } |
||
140 | |||
141 | $keep[] = $char; |
||
142 | } |
||
143 | |||
144 | $this->url = implode('', $keep); |
||
145 | } |
||
146 | |||
147 | protected function detectType() : bool |
||
148 | { |
||
149 | $types = array( |
||
150 | 'email', |
||
151 | 'fragmentLink', |
||
152 | 'phoneLink' |
||
153 | ); |
||
154 | |||
155 | foreach($types as $type) |
||
156 | { |
||
157 | $method = 'detectType_'.$type; |
||
158 | |||
159 | if($this->$method() === true) |
||
160 | { |
||
161 | $this->isValid = true; |
||
162 | return true; |
||
163 | } |
||
164 | } |
||
165 | |||
166 | return false; |
||
167 | } |
||
168 | |||
169 | protected function validate() |
||
187 | } |
||
188 | |||
189 | protected function validate_hostIsPresent() : bool |
||
190 | { |
||
191 | // every link needs a host. This case can happen for ex, if |
||
192 | // the link starts with a typo with only one slash, like: |
||
193 | // "http:/hostname" |
||
194 | if(isset($this->info['host'])) { |
||
195 | return true; |
||
196 | } |
||
197 | |||
198 | $this->setError( |
||
199 | URLInfo::ERROR_MISSING_HOST, |
||
200 | t('Cannot determine the link\'s host name.') . ' ' . |
||
201 | t('This usually happens when there\'s a typo somewhere.') |
||
202 | ); |
||
203 | |||
204 | return false; |
||
205 | } |
||
206 | |||
207 | protected function validate_schemeIsSet() : bool |
||
208 | { |
||
209 | if(isset($this->info['scheme'])) { |
||
210 | return true; |
||
211 | } |
||
212 | |||
213 | // no scheme found: it may be an email address without the mailto: |
||
214 | // It can't be a variable, since without the scheme it would already |
||
215 | // have been recognized as a vaiable only link. |
||
216 | $this->setError( |
||
217 | URLInfo::ERROR_MISSING_SCHEME, |
||
218 | t('Cannot determine the link\'s scheme, e.g. %1$s.', 'http') |
||
219 | ); |
||
220 | |||
221 | return false; |
||
222 | } |
||
223 | |||
224 | protected function validate_schemeIsKnown() : bool |
||
225 | { |
||
226 | if(in_array($this->info['scheme'], $this->knownSchemes)) { |
||
227 | return true; |
||
228 | } |
||
229 | |||
230 | $this->setError( |
||
231 | URLInfo::ERROR_INVALID_SCHEME, |
||
232 | t('The scheme %1$s is not supported for links.', $this->info['scheme']) . ' ' . |
||
233 | t('Valid schemes are: %1$s.', implode(', ', $this->knownSchemes)) |
||
234 | ); |
||
235 | |||
236 | return false; |
||
237 | } |
||
238 | |||
239 | /** |
||
240 | * Goes through all information in the parse_url result |
||
241 | * array, and attempts to fix any user errors in formatting |
||
242 | * that can be recovered from, mostly regarding stray spaces. |
||
243 | */ |
||
244 | protected function filterParsed() |
||
269 | } |
||
270 | } |
||
271 | |||
272 | /** |
||
273 | * Recursively goes through the array, and converts all previously |
||
274 | * URL encoded characters with their unicode character counterparts. |
||
275 | * |
||
276 | * @param array $subject |
||
277 | * @return array |
||
278 | */ |
||
279 | protected function restoreUnicodeChars(array $subject) : array |
||
300 | } |
||
301 | |||
302 | /** |
||
303 | * Replaces all URL encoded unicode characters |
||
304 | * in the string with the unicode character. |
||
305 | * |
||
306 | * @param string $string |
||
307 | * @return string |
||
308 | */ |
||
309 | protected function restoreUnicodeChar(string $string) : string |
||
310 | { |
||
311 | if(strstr($string, '%')) |
||
312 | { |
||
313 | return str_replace(array_keys($this->unicodeChars), array_values($this->unicodeChars), $string); |
||
314 | } |
||
315 | |||
316 | return $string; |
||
317 | } |
||
318 | |||
319 | protected function detectType_email() : bool |
||
320 | { |
||
321 | if(isset($this->info['scheme']) && $this->info['scheme'] == 'mailto') { |
||
322 | $this->info['type'] = URLInfo::TYPE_EMAIL; |
||
323 | return true; |
||
324 | } |
||
325 | |||
326 | if(isset($this->info['path']) && preg_match(\AppUtils\RegexHelper::REGEX_EMAIL, $this->info['path'])) |
||
327 | { |
||
328 | $this->info['scheme'] = 'mailto'; |
||
329 | $this->info['type'] = URLInfo::TYPE_EMAIL; |
||
330 | return true; |
||
331 | } |
||
332 | |||
333 | return false; |
||
334 | } |
||
335 | |||
336 | protected function detectType_fragmentLink() : bool |
||
337 | { |
||
338 | if(isset($this->info['fragment']) && !isset($this->info['scheme'])) { |
||
339 | $this->info['type'] = URLInfo::TYPE_FRAGMENT; |
||
340 | return true; |
||
341 | } |
||
342 | |||
343 | return false; |
||
344 | } |
||
345 | |||
346 | protected function detectType_phoneLink() : bool |
||
347 | { |
||
348 | if(isset($this->info['scheme']) && $this->info['scheme'] == 'tel') { |
||
349 | $this->info['type'] = URLInfo::TYPE_PHONE; |
||
350 | return true; |
||
351 | } |
||
352 | |||
353 | return false; |
||
354 | } |
||
355 | |||
356 | protected function setError(int $code, string $message) |
||
357 | { |
||
358 | $this->isValid = false; |
||
359 | |||
360 | $this->error = array( |
||
361 | 'code' => $code, |
||
362 | 'message' => $message |
||
363 | ); |
||
364 | } |
||
365 | |||
366 | /** |
||
367 | * Checks whether the URL that was parsed is valid. |
||
368 | * @return bool |
||
369 | */ |
||
370 | public function isValid() : bool |
||
371 | { |
||
372 | return $this->isValid; |
||
373 | } |
||
374 | |||
375 | /** |
||
376 | * If the validation failed, retrieves the validation |
||
377 | * error message. |
||
378 | * |
||
379 | * @return string |
||
380 | */ |
||
381 | public function getErrorMessage() : string |
||
382 | { |
||
383 | if(isset($this->error)) { |
||
384 | return $this->error['message']; |
||
385 | } |
||
386 | |||
387 | return ''; |
||
388 | } |
||
389 | |||
390 | /** |
||
391 | * If the validation failed, retrieves the validation |
||
392 | * error code. |
||
393 | * |
||
394 | * @return int |
||
395 | */ |
||
396 | public function getErrorCode() : int |
||
403 | } |
||
404 | } |
||
405 |