Passed
Push — master ( 61985e...8db7dc )
by Sebastian
04:44
created

URLInfo_Parser   B

Complexity

Total Complexity 51

Size/Duplication

Total Lines 379
Duplicated Lines 0 %

Importance

Changes 4
Bugs 0 Features 0
Metric Value
wmc 51
eloc 127
c 4
b 0
f 0
dl 0
loc 379
rs 7.92

19 Methods

Rating   Name   Duplication   Size   Complexity  
A validate() 0 18 3
B filterParsed() 0 25 7
A restoreUnicodeChars() 0 21 3
A detectType() 0 20 3
A filterUnicodeChars() 0 23 4
A getInfo() 0 3 1
A validate_schemeIsSet() 0 15 2
A validate_schemeIsKnown() 0 13 2
A getErrorCode() 0 7 2
A restoreUnicodeChar() 0 8 2
A getErrorMessage() 0 7 2
A detectType_fragmentLink() 0 8 3
A detectType_email() 0 15 5
A setError() 0 7 1
A isValid() 0 3 1
A __construct() 0 9 2
A parse() 0 13 3
A detectType_phoneLink() 0 8 3
A validate_hostIsPresent() 0 16 2

How to fix   Complexity   

Complex Class

Complex classes like URLInfo_Parser often do a lot of different things. To break such a class down, we need to identify a cohesive component within that class. A common approach to find such a component is to look for fields/methods that share the same prefixes, or suffixes.

Once you have determined the fields that belong together, you can apply the Extract Class refactoring. If the component makes sense as a sub-class, Extract Subclass is also a candidate, and is often faster.

While breaking up the class, it is a good idea to analyze how other classes use URLInfo_Parser, and based on these observations, apply Extract Interface, too.

1
<?php
2
/**
3
 * File containing the {@see AppUtils\URLInfo_Parser} class.
4
 *
5
 * @package Application Utils
6
 * @subpackage URLInfo
7
 * @see AppUtils\URLInfo_Parser
8
 */
9
10
declare(strict_types=1);
11
12
namespace AppUtils;
13
14
/**
15
 * Handles the URL parsing, as replacement for PHP's 
16
 * native parse_url function. It overcomes a number of
17
 * limitations of the function, using pre- and post-
18
 * processing of the URL and its component parts.
19
 *
20
 * @package Application Utils
21
 * @subpackage URLInfo
22
 * @author Sebastian Mordziol <[email protected]>
23
 */
24
class URLInfo_Parser
25
{
26
   /**
27
    * @var string
28
    */
29
    protected $url;
30
    
31
   /**
32
    * @var bool
33
    */
34
    protected $isValid = false;
35
    
36
   /**
37
    * @var array
38
    */
39
    protected $info;
40
    
41
   /**
42
    * @var array|NULL
43
    */
44
    protected $error;
45
    
46
    /**
47
     * @var string[]
48
     */
49
    protected $knownSchemes = array(
50
        'ftp',
51
        'http',
52
        'https',
53
        'mailto',
54
        'tel',
55
        'data',
56
        'file'
57
    );
58
    
59
   /**
60
    * Stores a list of all unicode characters in the URL
61
    * that have been filtered out before parsing it with
62
    * parse_url.
63
    * 
64
    * @var string[]string
65
    */
66
    protected $unicodeChars = array();
67
    
68
   /**
69
    * @var bool
70
    */
71
    protected $encodeUTF = false;
72
    
73
   /**
74
    * 
75
    * @param string $url The target URL.
76
    * @param bool $encodeUTF Whether to URL encode any plain text unicode characters.
77
    */
78
    public function __construct(string $url, bool $encodeUTF)
79
    {
80
        $this->url = $url;
81
        $this->encodeUTF = $encodeUTF;
82
        
83
        $this->parse();
84
        
85
        if(!$this->detectType()) {
86
            $this->validate();
87
        }
88
    }
89
90
   /**
91
    * Retrieves the array as parsed by PHP's parse_url,
92
    * filtered and adjusted as necessary.
93
    * 
94
    * @return array
95
    */
96
    public function getInfo() : array
97
    {
98
        return $this->info;
99
    }
100
    
101
    protected function parse()
102
    {
103
        $this->filterUnicodeChars();
104
        
105
        $this->info = parse_url($this->url);
106
        
107
        $this->filterParsed();
108
        
109
        // if the URL contains any URL characters, and we
110
        // do not want them URL encoded, restore them.
111
        if(!$this->encodeUTF && !empty($this->unicodeChars))
112
        {
113
            $this->info = $this->restoreUnicodeChars($this->info);
114
        }
115
    }
116
    
117
   /**
118
    * Finds any non-url encoded unicode characters in 
119
    * the URL, and encodes them before the URL is 
120
    * passed to parse_url.
121
    */
122
    protected function filterUnicodeChars() : void
123
    {
124
        $chars = \AppUtils\ConvertHelper::string2array($this->url);
125
        
126
        $keep = array();
127
        
128
        foreach($chars as $char)
129
        {
130
            if(preg_match('/\p{L}/usix', $char))
131
            {
132
                $encoded = rawurlencode($char);
133
                
134
                if($encoded != $char)
135
                {
136
                    $this->unicodeChars[$encoded] = $char;
137
                    $char = $encoded;
138
                }
139
            }
140
            
141
            $keep[] = $char;
142
        }
143
        
144
        $this->url = implode('', $keep);
145
    }
146
    
147
    protected function detectType() : bool
148
    {
149
        $types = array(
150
            'email',
151
            'fragmentLink',
152
            'phoneLink'
153
        );
154
        
155
        foreach($types as $type)
156
        {
157
            $method = 'detectType_'.$type;
158
            
159
            if($this->$method() === true) 
160
            {
161
                $this->isValid = true;
162
                return true;
163
            }
164
        }
165
        
166
        return false;
167
    }
168
    
169
    protected function validate()
170
    {
171
        $validations = array(
172
            'schemeIsSet',
173
            'schemeIsKnown',
174
            'hostIsPresent'
175
        );
176
        
177
        foreach($validations as $validation) 
178
        {
179
            $method = 'validate_'.$validation;
180
            
181
            if($this->$method() !== true) {
182
                return;
183
            }
184
        }
185
        
186
        $this->isValid = true;
187
    }
188
    
189
    protected function validate_hostIsPresent() : bool
190
    {
191
        // every link needs a host. This case can happen for ex, if
192
        // the link starts with a typo with only one slash, like:
193
        // "http:/hostname"
194
        if(isset($this->info['host'])) {
195
            return true;
196
        }
197
        
198
        $this->setError(
199
            URLInfo::ERROR_MISSING_HOST,
200
            t('Cannot determine the link\'s host name.') . ' ' .
201
            t('This usually happens when there\'s a typo somewhere.')
202
        );
203
204
        return false;
205
    }
206
    
207
    protected function validate_schemeIsSet() : bool
208
    {
209
        if(isset($this->info['scheme'])) {
210
            return true;
211
        }
212
        
213
        // no scheme found: it may be an email address without the mailto:
214
        // It can't be a variable, since without the scheme it would already
215
        // have been recognized as a vaiable only link.
216
        $this->setError(
217
            URLInfo::ERROR_MISSING_SCHEME,
218
            t('Cannot determine the link\'s scheme, e.g. %1$s.', 'http')
219
        );
220
        
221
        return false;
222
    }
223
    
224
    protected function validate_schemeIsKnown() : bool
225
    {
226
        if(in_array($this->info['scheme'], $this->knownSchemes)) {
227
            return true;
228
        }
229
        
230
        $this->setError(
231
            URLInfo::ERROR_INVALID_SCHEME,
232
            t('The scheme %1$s is not supported for links.', $this->info['scheme']) . ' ' .
233
            t('Valid schemes are: %1$s.', implode(', ', $this->knownSchemes))
234
        );
235
236
        return false;
237
    }
238
239
   /**
240
    * Goes through all information in the parse_url result
241
    * array, and attempts to fix any user errors in formatting
242
    * that can be recovered from, mostly regarding stray spaces.
243
    */
244
    protected function filterParsed()
245
    {
246
        $this->info['params'] = array();
247
        $this->info['type'] = URLInfo::TYPE_URL;
248
        
249
        if(isset($this->info['user'])) {
250
            $this->info['user'] = urldecode($this->info['user']);
251
        }
252
253
        if(isset($this->info['pass'])) {
254
            $this->info['pass'] = urldecode($this->info['pass']);
255
        }
256
        
257
        if(isset($this->info['host'])) {
258
            $this->info['host'] = str_replace(' ', '', $this->info['host']);
259
        }
260
        
261
        if(isset($this->info['path'])) {
262
            $this->info['path'] = str_replace(' ', '', $this->info['path']);
263
        }
264
        
265
        if(isset($this->info['query']) && !empty($this->info['query']))
266
        {
267
            $this->info['params'] = \AppUtils\ConvertHelper::parseQueryString($this->info['query']);
268
            ksort($this->info['params']);
269
        }
270
    }
271
    
272
   /**
273
    * Recursively goes through the array, and converts all previously
274
    * URL encoded characters with their unicode character counterparts.
275
    * 
276
    * @param array $subject
277
    * @return array
278
    */
279
    protected function restoreUnicodeChars(array $subject) : array
280
    {
281
        $result = array();
282
        
283
        foreach($subject as $key => $val)
284
        {
285
            if(is_array($val))
286
            {
287
                $val = $this->restoreUnicodeChars($val);
288
            }
289
            else
290
            {
291
                $val = $this->restoreUnicodeChar($val);
292
            }
293
            
294
            $key = $this->restoreUnicodeChar($key);
295
            
296
            $result[$key] = $val;
297
        }
298
        
299
        return $result;
300
    }
301
    
302
   /**
303
    * Replaces all URL encoded unicode characters
304
    * in the string with the unicode character.
305
    * 
306
    * @param string $string
307
    * @return string
308
    */
309
    protected function restoreUnicodeChar(string $string) : string
310
    {
311
        if(strstr($string, '%'))
312
        {
313
            return str_replace(array_keys($this->unicodeChars), array_values($this->unicodeChars), $string);
314
        }
315
        
316
        return $string;
317
    }
318
    
319
    protected function detectType_email() : bool
320
    {
321
        if(isset($this->info['scheme']) && $this->info['scheme'] == 'mailto') {
322
            $this->info['type'] = URLInfo::TYPE_EMAIL;
323
            return true;
324
        }
325
        
326
        if(isset($this->info['path']) && preg_match(\AppUtils\RegexHelper::REGEX_EMAIL, $this->info['path']))
327
        {
328
            $this->info['scheme'] = 'mailto';
329
            $this->info['type'] = URLInfo::TYPE_EMAIL;
330
            return true;
331
        }
332
        
333
        return false;
334
    }
335
    
336
    protected function detectType_fragmentLink() : bool
337
    {
338
        if(isset($this->info['fragment']) && !isset($this->info['scheme'])) {
339
            $this->info['type'] = URLInfo::TYPE_FRAGMENT;
340
            return true;
341
        }
342
        
343
        return false;
344
    }
345
    
346
    protected function detectType_phoneLink() : bool
347
    {
348
        if(isset($this->info['scheme']) && $this->info['scheme'] == 'tel') {
349
            $this->info['type'] = URLInfo::TYPE_PHONE;
350
            return true;
351
        }
352
        
353
        return false;
354
    }
355
356
    protected function setError(int $code, string $message)
357
    {
358
        $this->isValid = false;
359
        
360
        $this->error = array(
361
            'code' => $code,
362
            'message' => $message
363
        );
364
    }
365
   
366
   /**
367
    * Checks whether the URL that was parsed is valid.
368
    * @return bool
369
    */
370
    public function isValid() : bool
371
    {
372
        return $this->isValid;
373
    }
374
375
   /**
376
    * If the validation failed, retrieves the validation
377
    * error message.
378
    * 
379
    * @return string
380
    */
381
    public function getErrorMessage() : string
382
    {
383
        if(isset($this->error)) {
384
            return $this->error['message'];
385
        }
386
        
387
        return '';
388
    }
389
    
390
   /**
391
    * If the validation failed, retrieves the validation
392
    * error code.
393
    * 
394
    * @return int
395
    */
396
    public function getErrorCode() : int
397
    {
398
        if(isset($this->error)) {
399
            return $this->error['code'];
400
        }
401
        
402
        return -1;
403
    }
404
}
405