Passed
Push — master ( b083ab...a3db06 )
by Sebastian
03:30
created

URIParser::filterParsed()   C

Complexity

Conditions 11
Paths 288

Size

Total Lines 50
Code Lines 27

Duplication

Lines 0
Ratio 0 %

Importance

Changes 1
Bugs 0 Features 0
Metric Value
eloc 27
c 1
b 0
f 0
dl 0
loc 50
rs 5.3833
cc 11
nc 288
nop 0

How to fix   Complexity   

Long Method

Small methods make your code easier to understand, in particular if combined with a good name. Besides, if your method is small, finding a good name is usually much easier.

For example, if you find yourself adding comments to a method's body, this is usually a good sign to extract the commented part to a new method, and use the comment as a starting point when coming up with a good name for this new method.

Commonly applied refactorings include:

1
<?php
2
/**
3
 * @package Application Utils
4
 * @subpackage URLInfo
5
 * @see \AppUtils\URLInfo\URIParser
6
 */
7
8
declare(strict_types=1);
9
10
namespace AppUtils\URLInfo;
11
12
use AppUtils\ClassHelper;
13
use AppUtils\ConvertHelper;
14
use AppUtils\URLInfo;
15
use AppUtils\URLInfo\Parser\BaseURLTypeDetector;
16
use AppUtils\URLInfo\Parser\BaseURLValidator;
17
use AppUtils\URLInfo\Parser\URLTypeDetector\DetectEmail;
18
use AppUtils\URLInfo\Parser\URLTypeDetector\DetectFragmentLink;
19
use AppUtils\URLInfo\Parser\URLTypeDetector\DetectIPAddress;
20
use AppUtils\URLInfo\Parser\URLTypeDetector\DetectPhoneLink;
21
use AppUtils\URLInfo\Parser\URLTypeDetector\DetectStandardURL;
22
use AppUtils\URLInfo\Parser\URLValidator\ValidateHostIsPresent;
23
use AppUtils\URLInfo\Parser\URLValidator\ValidateIsTypeKnown;
24
use AppUtils\URLInfo\Parser\URLValidator\ValidateSchemeIsKnown;
25
use AppUtils\URLInfo\Parser\URLValidator\ValidateSchemeIsSet;
26
27
/**
28
 * Handles the URL parsing, as replacement for PHP's 
29
 * native parse_url function. It overcomes a number of
30
 * limitations of the function, using pre- and post-processing
31
 * of the URL and its component parts.
32
 *
33
 * @package Application Utils
34
 * @subpackage URLInfo
35
 * @author Sebastian Mordziol <[email protected]>
36
 */
37
class URIParser
38
{
39
    use URLInfoTrait;
40
41
    protected string $url;
42
    protected bool $isValid = false;
43
    protected bool $encodeUTF = false;
44
45
   /**
46
    * @var array{code:int,message:string}|NULL
47
    */
48
    protected ?array $error = null;
49
    
50
   /**
51
    * Stores a list of all unicode characters in the URL
52
    * that have been filtered out before parsing it with
53
    * parse_url.
54
    * 
55
    * @var array<string,string>
56
    */
57
    protected array $unicodeChars = array();
58
59
    /**
60
     * @var class-string[]
0 ignored issues
show
Documentation Bug introduced by
The doc comment class-string[] at position 0 could not be parsed: Unknown type name 'class-string' at position 0 in class-string[].
Loading history...
61
     */
62
    private static array $detectorClasses = array(
63
        DetectEmail::class,
64
        DetectFragmentLink::class,
65
        DetectPhoneLink::class,
66
        DetectIPAddress::class,
67
        DetectStandardURL::class
68
    );
69
70
    /**
71
     * @var class-string[]
0 ignored issues
show
Documentation Bug introduced by
The doc comment class-string[] at position 0 could not be parsed: Unknown type name 'class-string' at position 0 in class-string[].
Loading history...
72
     */
73
    private static array $validatorClasses = array(
74
        ValidateIsTypeKnown::class,
75
        ValidateSchemeIsSet::class,
76
        ValidateSchemeIsKnown::class,
77
        ValidateHostIsPresent::class
78
    );
79
80
    /**
81
    * 
82
    * @param string $url The target URL.
83
    * @param bool $encodeUTF Whether to URL encode any plain text unicode characters.
84
    */
85
    public function __construct(string $url, bool $encodeUTF)
86
    {
87
        $this->url = $url;
88
        $this->encodeUTF = $encodeUTF;
89
90
        $this->parse();
91
        $this->detectType();
92
        $this->validate();
93
    }
94
95
   /**
96
    * Retrieves the array as parsed by PHP's parse_url,
97
    * filtered and adjusted as necessary.
98
    * 
99
    * @return array
100
    */
101
    public function getInfo() : array
102
    {
103
        return $this->info;
104
    }
105
106
    protected function parse() : void
107
    {
108
        $this->filterUnicodeChars();
109
        
110
        $this->info = parse_url($this->url);
111
112
        $this->filterParsed();
113
114
        // if the URL contains any URL characters, and we
115
        // do not want them URL encoded, restore them.
116
        if(!$this->encodeUTF && !empty($this->unicodeChars))
117
        {
118
            $this->info = $this->restoreUnicodeChars($this->info);
119
        }
120
    }
121
122
   /**
123
    * Finds any non-url encoded unicode characters in 
124
    * the URL, and encodes them before the URL is 
125
    * passed to parse_url.
126
    */
127
    protected function filterUnicodeChars() : void
128
    {
129
        $chars = ConvertHelper::string2array($this->url);
130
        
131
        $keep = array();
132
        
133
        foreach($chars as $char)
134
        {
135
            if(preg_match('/\p{L}/uix', $char))
136
            {
137
                $encoded = rawurlencode($char);
138
                
139
                if($encoded !== $char)
140
                {
141
                    $this->unicodeChars[$encoded] = $char;
142
                    $char = $encoded;
143
                }
144
            }
145
            
146
            $keep[] = $char;
147
        }
148
        
149
        $this->url = implode('', $keep);
150
    }
151
152
    protected function detectType() : bool
153
    {
154
        foreach(self::$detectorClasses as $className)
155
        {
156
            $detector = ClassHelper::requireObjectInstanceOf(
157
                BaseURLTypeDetector::class,
158
                new $className($this)
159
            );
160
161
            $detected = $detector->detect();
162
163
            // Use the adjusted data
164
            $this->info = $detector->getInfo();
165
166
            if($detected) {
167
                $this->isValid = true;
168
                return true;
169
            }
170
        }
171
172
        return false;
173
    }
174
175
    protected function validate() : void
176
    {
177
        foreach(self::$validatorClasses as $validatorClass)
178
        {
179
            $validator = ClassHelper::requireObjectInstanceOf(
180
                BaseURLValidator::class,
181
                new $validatorClass($this)
182
            );
183
184
            $result = $validator->validate();
185
186
            $this->info = $validator->getInfo();
187
188
            if($result !== true) {
189
                $this->isValid = false;
190
                return;
191
            }
192
        }
193
        
194
        $this->isValid = true;
195
    }
196
197
   /**
198
    * Goes through all information in the parse_url result
199
    * array, and attempts to fix any user errors in formatting
200
    * that can be recovered from, mostly regarding stray spaces.
201
    */
202
    protected function filterParsed() : void
203
    {
204
        $this->info['params'] = array();
205
        $this->info['type'] = URLInfo::TYPE_NONE;
206
207
        if($this->hasScheme())
208
        {
209
            $this->setScheme(strtolower($this->getScheme()));
0 ignored issues
show
Bug introduced by
It seems like $this->getScheme() can also be of type null; however, parameter $string of strtolower() does only seem to accept string, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

209
            $this->setScheme(strtolower(/** @scrutinizer ignore-type */ $this->getScheme()));
Loading history...
210
        }
211
        else
212
        {
213
            $scheme = URISchemes::detectScheme($this->url);
214
            if(!empty($scheme)) {
215
                $this->setScheme(URISchemes::resolveSchemeName($scheme));
216
            }
217
        }
218
219
        if(isset($this->info['user'])) {
220
            $this->info['user'] = urldecode($this->info['user']);
221
        }
222
223
        if(isset($this->info['pass'])) {
224
            $this->info['pass'] = urldecode($this->info['pass']);
225
        }
226
        
227
        if($this->hasHost()) {
228
            $host = $this->getHost();
229
            $host = strtolower($host);
230
            $host = str_replace(' ', '', $host);
231
            $this->setHost($host);
232
        }
233
        
234
        if($this->hasPath()) {
235
            $this->setPath(str_replace(' ', '', $this->getPath()));
236
        }
237
238
        if($this->getPath() === 'localhost')
239
        {
240
            $this->setHost('localhost');
241
            $this->removePath();
242
243
            if(!$this->hasScheme()) {
244
                $this->setSchemeHTTPS();
245
            }
246
        }
247
248
        if(isset($this->info['query']) && !empty($this->info['query']))
249
        {
250
            $this->info['params'] = ConvertHelper::parseQueryString($this->info['query']);
251
            ksort($this->info['params']);
252
        }
253
    }
254
    
255
   /**
256
    * Recursively goes through the array, and converts all previously
257
    * URL encoded characters with their unicode character counterparts.
258
    * 
259
    * @param array $subject
260
    * @return array
261
    */
262
    protected function restoreUnicodeChars(array $subject) : array
263
    {
264
        $result = array();
265
        
266
        foreach($subject as $key => $val)
267
        {
268
            if(is_array($val))
269
            {
270
                $val = $this->restoreUnicodeChars($val);
271
            }
272
            else
273
            {
274
                $val = $this->restoreUnicodeChar($val);
275
            }
276
            
277
            $key = $this->restoreUnicodeChar($key);
278
            
279
            $result[$key] = $val;
280
        }
281
        
282
        return $result;
283
    }
284
    
285
   /**
286
    * Replaces all URL encoded unicode characters
287
    * in the string with the unicode character.
288
    * 
289
    * @param string $string
290
    * @return string
291
    */
292
    protected function restoreUnicodeChar(string $string) : string
293
    {
294
        if(strpos($string, '%') !== false)
295
        {
296
            return str_replace(array_keys($this->unicodeChars), array_values($this->unicodeChars), $string);
297
        }
298
        
299
        return $string;
300
    }
301
302
    public function setError(int $code, string $message) : void
303
    {
304
        $this->isValid = false;
305
        
306
        $this->error = array(
307
            'code' => $code,
308
            'message' => $message
309
        );
310
    }
311
   
312
   /**
313
    * Checks whether the URL that was parsed is valid.
314
    * @return bool
315
    */
316
    public function isValid() : bool
317
    {
318
        return $this->isValid;
319
    }
320
321
   /**
322
    * If the validation failed, retrieves the validation
323
    * error message.
324
    * 
325
    * @return string
326
    */
327
    public function getErrorMessage() : string
328
    {
329
        return $this->error['message'] ?? '';
330
    }
331
    
332
   /**
333
    * If the validation failed, retrieves the validation
334
    * error code.
335
    * 
336
    * @return int
337
    */
338
    public function getErrorCode() : int
339
    {
340
        return $this->error['code'] ?? -1;
341
    }
342
}
343