Passed
Push — master ( 707ddf...aa8b2f )
by Sebastian
04:56
created

URLInfo_Parser   F

Complexity

Total Complexity 64

Size/Duplication

Total Lines 420
Duplicated Lines 0 %

Importance

Changes 6
Bugs 0 Features 0
Metric Value
wmc 64
eloc 146
c 6
b 0
f 0
dl 0
loc 420
rs 3.28

22 Methods

Rating   Name   Duplication   Size   Complexity  
A validate() 0 18 3
B filterParsed() 0 38 9
A restoreUnicodeChars() 0 21 3
A detectType() 0 21 3
A filterUnicodeChars() 0 23 4
A getInfo() 0 3 1
A validate_schemeIsSet() 0 15 2
A isHostOnly() 0 3 3
A validate_schemeIsKnown() 0 13 2
A getErrorCode() 0 7 2
A detectType_ipAddress() 0 14 5
A restoreUnicodeChar() 0 8 2
A getErrorMessage() 0 7 2
A detectType_fragmentLink() 0 8 3
A isPathOnly() 0 3 3
A detectType_email() 0 15 5
A setError() 0 7 1
A isValid() 0 3 1
A __construct() 0 9 2
A parse() 0 13 3
A detectType_phoneLink() 0 8 3
A validate_hostIsPresent() 0 16 2

How to fix   Complexity   

Complex Class

Complex classes like URLInfo_Parser often do a lot of different things. To break such a class down, we need to identify a cohesive component within that class. A common approach to find such a component is to look for fields/methods that share the same prefixes, or suffixes.

Once you have determined the fields that belong together, you can apply the Extract Class refactoring. If the component makes sense as a sub-class, Extract Subclass is also a candidate, and is often faster.

While breaking up the class, it is a good idea to analyze how other classes use URLInfo_Parser, and based on these observations, apply Extract Interface, too.

1
<?php
2
/**
3
 * File containing the {@see AppUtils\URLInfo_Parser} class.
4
 *
5
 * @package Application Utils
6
 * @subpackage URLInfo
7
 * @see AppUtils\URLInfo_Parser
8
 */
9
10
declare(strict_types=1);
11
12
namespace AppUtils;
13
14
/**
15
 * Handles the URL parsing, as replacement for PHP's 
16
 * native parse_url function. It overcomes a number of
17
 * limitations of the function, using pre- and post-
18
 * processing of the URL and its component parts.
19
 *
20
 * @package Application Utils
21
 * @subpackage URLInfo
22
 * @author Sebastian Mordziol <[email protected]>
23
 */
24
class URLInfo_Parser
25
{
26
   /**
27
    * @var string
28
    */
29
    protected $url;
30
    
31
   /**
32
    * @var bool
33
    */
34
    protected $isValid = false;
35
    
36
   /**
37
    * @var array
38
    */
39
    protected $info;
40
    
41
   /**
42
    * @var array|NULL
43
    */
44
    protected $error;
45
    
46
    /**
47
     * @var string[]
48
     */
49
    protected $knownSchemes = array(
50
        'ftp',
51
        'http',
52
        'https',
53
        'mailto',
54
        'tel',
55
        'data',
56
        'file',
57
        'git'
58
    );
59
    
60
   /**
61
    * Stores a list of all unicode characters in the URL
62
    * that have been filtered out before parsing it with
63
    * parse_url.
64
    * 
65
    * @var string[]string
66
    */
67
    protected $unicodeChars = array();
68
    
69
   /**
70
    * @var bool
71
    */
72
    protected $encodeUTF = false;
73
    
74
   /**
75
    * 
76
    * @param string $url The target URL.
77
    * @param bool $encodeUTF Whether to URL encode any plain text unicode characters.
78
    */
79
    public function __construct(string $url, bool $encodeUTF)
80
    {
81
        $this->url = $url;
82
        $this->encodeUTF = $encodeUTF;
83
        
84
        $this->parse();
85
        
86
        if(!$this->detectType()) {
87
            $this->validate();
88
        }
89
    }
90
91
   /**
92
    * Retrieves the array as parsed by PHP's parse_url,
93
    * filtered and adjusted as necessary.
94
    * 
95
    * @return array
96
    */
97
    public function getInfo() : array
98
    {
99
        return $this->info;
100
    }
101
    
102
    protected function parse()
103
    {
104
        $this->filterUnicodeChars();
105
        
106
        $this->info = parse_url($this->url);
107
108
        $this->filterParsed();
109
        
110
        // if the URL contains any URL characters, and we
111
        // do not want them URL encoded, restore them.
112
        if(!$this->encodeUTF && !empty($this->unicodeChars))
113
        {
114
            $this->info = $this->restoreUnicodeChars($this->info);
115
        }
116
    }
117
118
   /**
119
    * Finds any non-url encoded unicode characters in 
120
    * the URL, and encodes them before the URL is 
121
    * passed to parse_url.
122
    */
123
    protected function filterUnicodeChars() : void
124
    {
125
        $chars = ConvertHelper::string2array($this->url);
126
        
127
        $keep = array();
128
        
129
        foreach($chars as $char)
130
        {
131
            if(preg_match('/\p{L}/usix', $char))
132
            {
133
                $encoded = rawurlencode($char);
134
                
135
                if($encoded != $char)
136
                {
137
                    $this->unicodeChars[$encoded] = $char;
138
                    $char = $encoded;
139
                }
140
            }
141
            
142
            $keep[] = $char;
143
        }
144
        
145
        $this->url = implode('', $keep);
146
    }
147
    
148
    protected function detectType() : bool
149
    {
150
        $types = array(
151
            'email',
152
            'fragmentLink',
153
            'phoneLink',
154
            'ipAddress'
155
        );
156
        
157
        foreach($types as $type)
158
        {
159
            $method = 'detectType_'.$type;
160
            
161
            if($this->$method() === true) 
162
            {
163
                $this->isValid = true;
164
                return true;
165
            }
166
        }
167
        
168
        return false;
169
    }
170
    
171
    protected function validate()
172
    {
173
        $validations = array(
174
            'schemeIsSet',
175
            'schemeIsKnown',
176
            'hostIsPresent'
177
        );
178
        
179
        foreach($validations as $validation) 
180
        {
181
            $method = 'validate_'.$validation;
182
            
183
            if($this->$method() !== true) {
184
                return;
185
            }
186
        }
187
        
188
        $this->isValid = true;
189
    }
190
    
191
    protected function validate_hostIsPresent() : bool
192
    {
193
        // every link needs a host. This case can happen for ex, if
194
        // the link starts with a typo with only one slash, like:
195
        // "http:/hostname"
196
        if(isset($this->info['host'])) {
197
            return true;
198
        }
199
        
200
        $this->setError(
201
            URLInfo::ERROR_MISSING_HOST,
202
            t('Cannot determine the link\'s host name.') . ' ' .
203
            t('This usually happens when there\'s a typo somewhere.')
204
        );
205
206
        return false;
207
    }
208
    
209
    protected function validate_schemeIsSet() : bool
210
    {
211
        if(isset($this->info['scheme'])) {
212
            return true;
213
        }
214
        
215
        // no scheme found: it may be an email address without the mailto:
216
        // It can't be a variable, since without the scheme it would already
217
        // have been recognized as a variable only link.
218
        $this->setError(
219
            URLInfo::ERROR_MISSING_SCHEME,
220
            t('Cannot determine the link\'s scheme, e.g. %1$s.', 'http')
221
        );
222
        
223
        return false;
224
    }
225
    
226
    protected function validate_schemeIsKnown() : bool
227
    {
228
        if(in_array($this->info['scheme'], $this->knownSchemes)) {
229
            return true;
230
        }
231
        
232
        $this->setError(
233
            URLInfo::ERROR_INVALID_SCHEME,
234
            t('The scheme %1$s is not supported for links.', $this->info['scheme']) . ' ' .
235
            t('Valid schemes are: %1$s.', implode(', ', $this->knownSchemes))
236
        );
237
238
        return false;
239
    }
240
241
   /**
242
    * Goes through all information in the parse_url result
243
    * array, and attempts to fix any user errors in formatting
244
    * that can be recovered from, mostly regarding stray spaces.
245
    */
246
    protected function filterParsed() : void
247
    {
248
        $this->info['params'] = array();
249
        $this->info['type'] = URLInfo::TYPE_URL;
250
251
        if(isset($this->info['scheme']))
252
        {
253
            $this->info['scheme'] = strtolower($this->info['scheme']);
254
        }
255
        else
256
        {
257
            $scheme = URLInfo_Schemes::detectScheme($this->url);
258
            if(!empty($scheme)) {
259
                $this->info['scheme'] = substr($scheme,0, strpos($scheme, ':'));
260
            }
261
        }
262
263
        if(isset($this->info['user'])) {
264
            $this->info['user'] = urldecode($this->info['user']);
265
        }
266
267
        if(isset($this->info['pass'])) {
268
            $this->info['pass'] = urldecode($this->info['pass']);
269
        }
270
        
271
        if(isset($this->info['host'])) {
272
            $this->info['host'] = strtolower($this->info['host']);
273
            $this->info['host'] = str_replace(' ', '', $this->info['host']);
274
        }
275
        
276
        if(isset($this->info['path'])) {
277
            $this->info['path'] = str_replace(' ', '', $this->info['path']);
278
        }
279
        
280
        if(isset($this->info['query']) && !empty($this->info['query']))
281
        {
282
            $this->info['params'] = ConvertHelper::parseQueryString($this->info['query']);
283
            ksort($this->info['params']);
284
        }
285
    }
286
    
287
   /**
288
    * Recursively goes through the array, and converts all previously
289
    * URL encoded characters with their unicode character counterparts.
290
    * 
291
    * @param array $subject
292
    * @return array
293
    */
294
    protected function restoreUnicodeChars(array $subject) : array
295
    {
296
        $result = array();
297
        
298
        foreach($subject as $key => $val)
299
        {
300
            if(is_array($val))
301
            {
302
                $val = $this->restoreUnicodeChars($val);
303
            }
304
            else
305
            {
306
                $val = $this->restoreUnicodeChar($val);
307
            }
308
            
309
            $key = $this->restoreUnicodeChar($key);
310
            
311
            $result[$key] = $val;
312
        }
313
        
314
        return $result;
315
    }
316
    
317
   /**
318
    * Replaces all URL encoded unicode characters
319
    * in the string with the unicode character.
320
    * 
321
    * @param string $string
322
    * @return string
323
    */
324
    protected function restoreUnicodeChar(string $string) : string
325
    {
326
        if(strstr($string, '%'))
327
        {
328
            return str_replace(array_keys($this->unicodeChars), array_values($this->unicodeChars), $string);
329
        }
330
        
331
        return $string;
332
    }
333
    
334
    protected function detectType_email() : bool
335
    {
336
        if(isset($this->info['scheme']) && $this->info['scheme'] == 'mailto') {
337
            $this->info['type'] = URLInfo::TYPE_EMAIL;
338
            return true;
339
        }
340
        
341
        if(isset($this->info['path']) && preg_match(RegexHelper::REGEX_EMAIL, $this->info['path']))
342
        {
343
            $this->info['scheme'] = 'mailto';
344
            $this->info['type'] = URLInfo::TYPE_EMAIL;
345
            return true;
346
        }
347
        
348
        return false;
349
    }
350
351
    protected function detectType_ipAddress() : bool
352
    {
353
        if($this->isPathOnly() && preg_match(RegexHelper::REGEX_IPV4, $this->info['path'])) {
354
            $this->info['host'] = $this->info['path'];
355
            $this->info['scheme'] = 'https';
356
            unset($this->info['path']);
357
        }
358
359
        if($this->isHostOnly() && preg_match(RegexHelper::REGEX_IPV4, $this->info['host'])) {
360
            $this->info['ip'] = $this->info['host'];
361
            return true;
362
        }
363
364
        return false;
365
    }
366
367
    private function isPathOnly() : bool
368
    {
369
        return isset($this->info['path']) && !isset($this->info['host']) && !isset($this->info['scheme']);
370
    }
371
372
    private function isHostOnly() : bool
373
    {
374
        return isset($this->info['host']) && !isset($this->info['path']) && !isset($this->info['query']);
375
    }
376
377
    protected function detectType_fragmentLink() : bool
378
    {
379
        if(isset($this->info['fragment']) && !isset($this->info['scheme'])) {
380
            $this->info['type'] = URLInfo::TYPE_FRAGMENT;
381
            return true;
382
        }
383
        
384
        return false;
385
    }
386
    
387
    protected function detectType_phoneLink() : bool
388
    {
389
        if(isset($this->info['scheme']) && $this->info['scheme'] == 'tel') {
390
            $this->info['type'] = URLInfo::TYPE_PHONE;
391
            return true;
392
        }
393
        
394
        return false;
395
    }
396
397
    protected function setError(int $code, string $message)
398
    {
399
        $this->isValid = false;
400
        
401
        $this->error = array(
402
            'code' => $code,
403
            'message' => $message
404
        );
405
    }
406
   
407
   /**
408
    * Checks whether the URL that was parsed is valid.
409
    * @return bool
410
    */
411
    public function isValid() : bool
412
    {
413
        return $this->isValid;
414
    }
415
416
   /**
417
    * If the validation failed, retrieves the validation
418
    * error message.
419
    * 
420
    * @return string
421
    */
422
    public function getErrorMessage() : string
423
    {
424
        if(isset($this->error)) {
425
            return $this->error['message'];
426
        }
427
        
428
        return '';
429
    }
430
    
431
   /**
432
    * If the validation failed, retrieves the validation
433
    * error code.
434
    * 
435
    * @return int
436
    */
437
    public function getErrorCode() : int
438
    {
439
        if(isset($this->error)) {
440
            return $this->error['code'];
441
        }
442
        
443
        return -1;
444
    }
445
}
446