Passed
Push — master ( 12a81e...e685ae )
by Sebastian
02:29
created

URLInfo::normalize()   A

Complexity

Conditions 3
Paths 3

Size

Total Lines 13
Code Lines 6

Duplication

Lines 0
Ratio 0 %

Importance

Changes 1
Bugs 0 Features 0
Metric Value
eloc 6
c 1
b 0
f 0
dl 0
loc 13
rs 10
cc 3
nc 3
nop 1
1
<?php
2
/**
3
 * File containing the {@see AppUtils\URLInfo} class.
4
 * 
5
 * @package Application Utils
6
 * @subpackage URLInfo
7
 * @see AppUtils\URLInfo
8
 */
9
10
declare(strict_types=1);
11
12
namespace AppUtils;
13
14
/**
15
 * Replacement for PHP's native `parse_url` function, which
16
 * handles some common pitfalls and issues that are hard to 
17
 * follow, as well as adding a number of utility methods.
18
 * 
19
 * @package Application Utils
20
 * @subpackage URLInfo
21
 * @author Sebastian Mordziol <[email protected]>
22
 */
23
class URLInfo implements \ArrayAccess
24
{
25
    const ERROR_MISSING_SCHEME = 42101;
26
    
27
    const ERROR_INVALID_SCHEME = 42102;
28
29
    const ERROR_MISSING_HOST = 42103;
30
    
31
    const ERROR_CANNOT_FIND_CSS_FOLDER = 42104;
32
    
33
    const ERROR_UNKNOWN_TYPE_FOR_LABEL = 42105;
34
    
35
    const ERROR_CURL_INIT_FAILED = 42106;
36
    
37
    const TYPE_EMAIL = 'email';
38
    const TYPE_FRAGMENT = 'fragment';
39
    const TYPE_PHONE = 'phone';
40
    const TYPE_URL = 'url';
41
    
42
   /**
43
    * The original URL that was passed to the constructor.
44
    * @var string
45
    */
46
    protected $rawURL;
47
48
   /**
49
    * @var array
50
    */
51
    protected $info;
52
    
53
   /**
54
    * @var string[]
55
    */
56
    protected $excludedParams = array();
57
    
58
   /**
59
    * @var bool
60
    * @see URLInfo::setParamExclusion()
61
    */
62
    protected $paramExclusion = false;
63
    
64
   /**
65
    * @var array
66
    * @see URLInfo::getTypeLabel()
67
    */
68
    protected static $typeLabels;
69
    
70
   /**
71
    * @var bool
72
    */
73
    protected $highlightExcluded = false;
74
    
75
   /**
76
    * @var array
77
    */
78
    protected $infoKeys = array(
79
        'scheme',
80
        'host',
81
        'port',
82
        'user',
83
        'pass',
84
        'path',
85
        'query',
86
        'fragment'
87
    );
88
    
89
   /**
90
    * @var string
91
    */
92
    protected $url;
93
    
94
   /**
95
    * @var URLInfo_Parser
96
    */
97
    protected $parser;
98
    
99
   /**
100
    * @var URLInfo_Normalizer
101
    */
102
    protected $normalizer;
103
    
104
    public function __construct(string $url)
105
    {
106
        $this->rawURL = $url;
107
        $this->url = self::filterURL($url);
108
        
109
        $this->parser = new URLInfo_Parser($this->url);
110
        $this->info = $this->parser->getInfo();
111
    }
112
    
113
   /**
114
    * Filters an URL: removes control characters and the
115
    * like to have a clean URL to work with.
116
    * 
117
    * @param string $url
118
    * @return string
119
    */
120
    public static function filterURL(string $url)
121
    {
122
        return URLInfo_Filter::filter($url);
123
    }
124
    
125
    /**
126
     * Checks if it is an https link.
127
     * @return boolean
128
     */
129
    public function isSecure() : bool
130
    {
131
        return isset($this->info['scheme']) && $this->info['scheme'] === 'https';
132
    }
133
    
134
    public function isAnchor() : bool
135
    {
136
        return $this->info['type'] === self::TYPE_FRAGMENT;
137
    }
138
    
139
    public function isEmail() : bool
140
    {
141
        return $this->info['type'] === self::TYPE_EMAIL;
142
    }
143
    
144
    public function isPhoneNumber() : bool
145
    {
146
        return $this->info['type'] === self::TYPE_PHONE;
147
    }
148
    
149
   /**
150
    * Whether the URL is a regular URL, not one of the 
151
    * other types like a phone number or email address.
152
    * 
153
    * @return bool
154
    */
155
    public function isURL() : bool
156
    {
157
        $host = $this->getHost();
158
        return !empty($host);
159
    }
160
    
161
    public function isValid() : bool
162
    {
163
        return $this->parser->isValid();
164
    }
165
    
166
   /**
167
    * Retrieves the host name, or an empty string if none is present.
168
    * 
169
    * @return string
170
    */
171
    public function getHost() : string
172
    {
173
        return $this->getInfoKey('host');
174
    }
175
    
176
   /**
177
    * Retrieves the path, or an empty string if none is present.
178
    * @return string
179
    */
180
    public function getPath() : string
181
    {
182
        return $this->getInfoKey('path');
183
    }
184
    
185
    public function getFragment() : string
186
    {
187
        return $this->getInfoKey('fragment');
188
    }
189
    
190
    public function getScheme() : string
191
    {
192
        return $this->getInfoKey('scheme');
193
    }
194
    
195
   /**
196
    * Retrieves the port specified in the URL, or -1 if none is preseent.
197
    * @return int
198
    */
199
    public function getPort() : int
200
    {
201
        $port = $this->getInfoKey('port');
202
        
203
        if(!empty($port)) {
204
            return (int)$port;
205
        }
206
        
207
        return -1;
208
    }
209
    
210
   /**
211
    * Retrieves the raw query string, or an empty string if none is present.
212
    * 
213
    * @return string
214
    * 
215
    * @see URLInfo::getParams()
216
    */
217
    public function getQuery() : string
218
    {
219
        return $this->getInfoKey('query');
220
    }
221
    
222
    public function getUsername() : string
223
    {
224
        return $this->getInfoKey('user');
225
    }
226
    
227
    public function getPassword() : string
228
    {
229
        return $this->getInfoKey('pass');
230
    }
231
    
232
   /**
233
    * Whether the URL contains a port number.
234
    * @return bool
235
    */
236
    public function hasPort() : bool
237
    {
238
        return $this->getPort() !== -1;
239
    }
240
    
241
   /**
242
    * Alias for the hasParams() method.
243
    * @return bool
244
    * @see URLInfo::hasParams()
245
    */
246
    public function hasQuery() : bool
247
    {
248
        return $this->hasParams();
249
    }
250
    
251
    public function hasHost() : bool
252
    {
253
        return $this->getHost() !== ''; 
254
    }
255
    
256
    public function hasPath() : bool
257
    {
258
        return $this->getPath() !== '';
259
    }
260
    
261
    public function hasFragment() : bool
262
    {
263
        return $this->getFragment() !== '';
264
    }
265
    
266
    public function hasUsername() : bool
267
    {
268
        return $this->getUsername() !== '';
269
    }
270
    
271
    public function hasPassword() : bool
272
    {
273
        return $this->getPassword() !== '';
274
    }
275
    
276
    public function hasScheme() : bool
277
    {
278
        return $this->getScheme() !== '';
279
    }
280
    
281
    protected function getInfoKey(string $name) : string
282
    {
283
        if(isset($this->info[$name])) {
284
            return (string)$this->info[$name];
285
        }
286
        
287
        return '';
288
    }
289
290
   /**
291
    * Retrieves a normalized URL: this ensures that all parameters
292
    * in the URL are always in the same order.
293
    * 
294
    * @return string
295
    */
296
    public function getNormalized() : string
297
    {
298
        return $this->normalize(true);
299
    }
300
    
301
   /**
302
    * Like getNormalized(), but if a username and password are present
303
    * in the URL, returns the URL without them.
304
    * 
305
    * @return string
306
    */
307
    public function getNormalizedWithoutAuth() : string
308
    {
309
        return $this->normalize(false);
310
    }
311
    
312
    protected function normalize(bool $auth=true) : string
313
    {
314
        if(!$this->isValid()) {
315
            return '';
316
        }
317
        
318
        if(!isset($this->normalizer)) {
319
            $this->normalizer = new URLInfo_Normalizer($this);
320
        }
321
        
322
        $this->normalizer->enableAuth($auth);
323
        
324
        return $this->normalizer->normalize();
325
    }
326
    
327
   /**
328
    * Creates a hash of the URL, which can be used for comparisons.
329
    * Since any parameters in the URL's query are sorted alphabetically,
330
    * the same links with a different parameter order will have the 
331
    * same hash.
332
    * 
333
    * @return string
334
    */
335
    public function getHash()
336
    {
337
        return \AppUtils\ConvertHelper::string2shortHash($this->getNormalized());
338
    }
339
340
   /**
341
    * Highlights the URL using HTML tags with specific highlighting
342
    * class names.
343
    * 
344
    * @return string Will return an empty string if the URL is not valid.
345
    */
346
    public function getHighlighted() : string
347
    {
348
        if(!$this->isValid()) {
349
            return '';
350
        }
351
        
352
        $highlighter = new URLInfo_Highlighter($this);
353
        
354
        return $highlighter->highlight();
355
    }
356
    
357
    public function getErrorMessage() : string
358
    {
359
        return $this->parser->getErrorMessage();
360
    }
361
    
362
    public function getErrorCode() : int
363
    {
364
        return $this->parser->getErrorCode();
365
    }
366
    
367
    public function hasParams() : bool
368
    {
369
        $params = $this->getParams();
370
        return !empty($params);
371
    }
372
    
373
    public function countParams() : int
374
    {
375
        $params = $this->getParams();
376
        return count($params);
377
    }
378
    
379
   /**
380
    * Retrieves all parameters specified in the url,
381
    * if any, as an associative array. 
382
    * 
383
    * NOTE: Ignores parameters that have been added
384
    * to the excluded parameters list.
385
    *
386
    * @return array
387
    */
388
    public function getParams() : array
389
    {
390
        if(!$this->paramExclusion || empty($this->excludedParams)) {
391
            return $this->info['params'];
392
        }
393
        
394
        $keep = array();
395
        foreach($this->info['params'] as $name => $value) 
396
        {
397
            if(!isset($this->excludedParams[$name])) {
398
                $keep[$name] = $value;
399
            }
400
        }
401
        
402
        return $keep;
403
    }
404
    
405
   /**
406
    * Retrieves the names of all parameters present in the URL, if any.
407
    * @return string[]
408
    */
409
    public function getParamNames() : array
410
    {
411
        $params = $this->getParams();
412
        return array_keys($params);
413
    }
414
    
415
   /**
416
    * Retrieves a specific parameter value from the URL.
417
    * 
418
    * @param string $name
419
    * @return string The parameter value, or an empty string if it does not exist.
420
    */
421
    public function getParam(string $name) : string
422
    {
423
        if(isset($this->info['params'][$name])) {
424
            return $this->info['params'][$name];
425
        }
426
        
427
        return '';
428
    }
429
    
430
   /**
431
    * Excludes an URL parameter entirely if present:
432
    * the parser will act as if the parameter was not
433
    * even present in the source URL, effectively
434
    * stripping it.
435
    *
436
    * @param string $name
437
    * @param string $reason A human readable explanation why this is excluded - used when highlighting links.
438
    * @return URLInfo
439
    */
440
    public function excludeParam(string $name, string $reason) : URLInfo
441
    {
442
        if(!isset($this->excludedParams[$name]))
443
        {
444
            $this->excludedParams[$name] = $reason;
445
            $this->setParamExclusion();
446
        }
447
        
448
        return $this;
449
    }
450
451
    /**
452
     * Retrieves a string identifier of the type of URL that was detected.
453
     *
454
     * @return string
455
     *
456
     * @see URLInfo::TYPE_EMAIL
457
     * @see URLInfo::TYPE_FRAGMENT
458
     * @see URLInfo::TYPE_PHONE
459
     * @see URLInfo::TYPE_URL
460
     */
461
    public function getType() : string
462
    {
463
        return $this->info['type'];
464
    }
465
    
466
    public function getTypeLabel() : string
467
    {
468
        if(!isset(self::$typeLabels))
469
        {
470
            self::$typeLabels = array(
471
                self::TYPE_EMAIL => t('Email'),
472
                self::TYPE_FRAGMENT => t('Jump mark'),
473
                self::TYPE_PHONE => t('Phone number'),
474
                self::TYPE_URL => t('URL'),
475
            );
476
        }
477
        
478
        $type = $this->getType();
479
        
480
        if(!isset(self::$typeLabels[$type]))
481
        {
482
            throw new BaseException(
483
                sprintf('Unknown URL type label for type [%s].', $type),
484
                null,
485
                self::ERROR_UNKNOWN_TYPE_FOR_LABEL
486
            );
487
        }
488
        
489
        return self::$typeLabels[$this->getType()];
490
    }
491
492
   /**
493
    * Whether excluded parameters should be highlighted in
494
    * a different color in the URL when using the
495
    * {@link URLInfo::getHighlighted()} method.
496
    *
497
    * @param bool $highlight
498
    * @return URLInfo
499
    */
500
    public function setHighlightExcluded(bool $highlight=true) : URLInfo
501
    {
502
        $this->highlightExcluded = $highlight;
503
        return $this;
504
    }
505
    
506
   /**
507
    * Returns an array with all relevant URL information.
508
    * 
509
    * @return array
510
    */
511
    public function toArray() : array
512
    {
513
        return array(
514
            'hasParams' => $this->hasParams(),
515
            'params' => $this->getParams(),
516
            'type' => $this->getType(),
517
            'typeLabel' => $this->getTypeLabel(),
518
            'normalized' => $this->getNormalized(),
519
            'highlighted' => $this->getHighlighted(),
520
            'hash' => $this->getHash(),
521
            'host' => $this->getHost(),
522
            'isValid' => $this->isValid(),
523
            'isURL' => $this->isURL(),
524
            'isEmail' => $this->isEmail(),
525
            'isAnchor' => $this->isAnchor(),
526
            'isPhoneNumber' => $this->isPhoneNumber(),
527
            'errorMessage' => $this->getErrorMessage(),
528
            'errorCode' => $this->getErrorCode(),
529
            'excludedParams' => array_keys($this->excludedParams)
530
        );
531
    }
532
    
533
    /**
534
     * Enable or disable parameter exclusion: if any parameters
535
     * to exclude have been added, this allows switching between
536
     * both modes. When enabled, methods like getNormalized or
537
     * getHighlighted will exclude any parameters to exclude. When
538
     * disabled, it will act as usual.
539
     *
540
     * This allows adding parameters to exclude, but still have
541
     * access to the original URLs.
542
     *
543
     * @param bool $enabled
544
     * @return URLInfo
545
     * @see URLInfo::isParamExclusionEnabled()
546
     * @see URLInfo::setHighlightExcluded()
547
     */
548
    public function setParamExclusion(bool $enabled=true) : URLInfo
549
    {
550
        $this->paramExclusion = $enabled;
551
        return $this;
552
    }
553
    
554
   /**
555
    * Whether the parameter exclusion mode is enabled:
556
    * In this case, if any parameters have been added to the
557
    * exclusion list, all relevant methods will exclude these.
558
    *
559
    * @return bool
560
    */
561
    public function isParamExclusionEnabled() : bool
562
    {
563
        return $this->paramExclusion;
564
    }
565
    
566
   /**
567
    * Checks whether the link contains any parameters that
568
    * are on the list of excluded parameters.
569
    *
570
    * @return bool
571
    */
572
    public function containsExcludedParams() : bool
573
    {
574
        if(empty($this->excludedParams)) {
575
            return false;
576
        }
577
        
578
        $names = array_keys($this->info['params']);
579
        foreach($names as $name) {
580
            if(isset($this->excludedParams[$name])) {
581
                return true;
582
            }
583
        }
584
        
585
        return false;
586
    }
587
    
588
    public function hasParam(string $name) : bool
589
    {
590
        $names = $this->getParamNames();
591
        return in_array($name, $names);
592
    }
593
594
    public function offsetSet($offset, $value) 
595
    {
596
        if(in_array($offset, $this->infoKeys)) {
597
            $this->info[$offset] = $value;
598
        }
599
    }
600
    
601
    public function offsetExists($offset) 
602
    {
603
        return isset($this->info[$offset]);
604
    }
605
    
606
    public function offsetUnset($offset) 
607
    {
608
        unset($this->info[$offset]);
609
    }
610
    
611
    public function offsetGet($offset) 
612
    {
613
        if($offset === 'port') {
614
            return $this->getPort();
615
        }
616
        
617
        if(in_array($offset, $this->infoKeys)) {
618
            return $this->getInfoKey($offset);
619
        }
620
        
621
        return '';
622
    }
623
    
624
    public static function getHighlightCSS() : string
625
    {
626
        return URLInfo_Highlighter::getHighlightCSS();
627
    }
628
    
629
    public function getExcludedParams() : array
630
    {
631
        return $this->excludedParams;
632
    }
633
    
634
    public function isHighlightExcludeEnabled() : bool
635
    {
636
        return $this->highlightExcluded;
637
    }
638
    
639
   /**
640
    * Checks if the URL exists, i.e. can be connected to. Will return
641
    * true if the returned HTTP status code is `200` or `302`.
642
    * 
643
    * NOTE: If the target URL requires HTTP authentication, the username
644
    * and password should be integrated into the URL.
645
    * 
646
    * @return bool
647
    * @throws BaseException
648
    */
649
    public function tryConnect(bool $verifySSL=true) : bool
650
    {
651
        requireCURL();
652
        
653
        $ch = curl_init();
654
        if(!is_resource($ch))
655
        {
656
            throw new BaseException(
657
                'Could not initialize a new cURL instance.',
658
                'Calling curl_init returned false. Additional information is not available.',
659
                self::ERROR_CURL_INIT_FAILED
660
            );
661
        }
662
        
663
        //curl_setopt($ch, CURLOPT_VERBOSE, true);
664
        
665
        curl_setopt($ch, CURLOPT_URL, $this->getNormalized());
666
        curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
667
        curl_setopt($ch, CURLOPT_TIMEOUT, 10);
668
        curl_setopt($ch, CURLOPT_FOLLOWLOCATION, true);
669
        
670
        if(!$verifySSL) 
671
        {
672
            curl_setopt($ch, CURLOPT_SSL_VERIFYHOST, false);
673
            curl_setopt($ch, CURLOPT_SSL_VERIFYPEER, false);
674
        }
675
        
676
        if($this->hasUsername()) 
677
        {
678
            curl_setopt($ch, CURLOPT_USERNAME, $this->getUsername());
679
            curl_setopt($ch, CURLOPT_PASSWORD, $this->getPassword());
680
        }
681
        
682
        curl_exec($ch);
683
        
684
        $http_code = (int)curl_getinfo($ch, CURLINFO_HTTP_CODE);
685
        
686
        curl_close($ch);
687
        
688
        return ($http_code === 200) || ($http_code === 302);
689
    }
690
}
691