Passed
Push — master ( 58b85c...be6392 )
by Sebastian
03:06
created

URLInfo::parse()   A

Complexity

Conditions 1
Paths 1

Size

Total Lines 4
Code Lines 2

Duplication

Lines 0
Ratio 0 %

Importance

Changes 2
Bugs 0 Features 0
Metric Value
eloc 2
c 2
b 0
f 0
dl 0
loc 4
rs 10
cc 1
nc 1
nop 0
1
<?php
2
/**
3
 * File containing the {@see AppUtils\URLInfo} class.
4
 * 
5
 * @package Application Utils
6
 * @subpackage URLInfo
7
 * @see AppUtils\URLInfo
8
 */
9
10
declare(strict_types=1);
11
12
namespace AppUtils;
13
14
/**
15
 * Replacement for PHP's native `parse_url` function, which
16
 * handles some common pitfalls and issues that are hard to 
17
 * follow, as well as adding a number of utility methods.
18
 * 
19
 * @package Application Utils
20
 * @subpackage URLInfo
21
 * @author Sebastian Mordziol <[email protected]>
22
 */
23
class URLInfo implements \ArrayAccess
24
{
25
    const ERROR_MISSING_SCHEME = 42101;
26
    const ERROR_INVALID_SCHEME = 42102;
27
    const ERROR_MISSING_HOST = 42103;
28
    const ERROR_CANNOT_FIND_CSS_FOLDER = 42104;
29
    const ERROR_UNKNOWN_TYPE_FOR_LABEL = 42105;
30
    const ERROR_CURL_INIT_FAILED = 42106;
31
    
32
    const TYPE_EMAIL = 'email';
33
    const TYPE_FRAGMENT = 'fragment';
34
    const TYPE_PHONE = 'phone';
35
    const TYPE_URL = 'url';
36
    
37
   /**
38
    * The original URL that was passed to the constructor.
39
    * @var string
40
    */
41
    protected $rawURL;
42
43
   /**
44
    * @var array
45
    */
46
    protected $info;
47
    
48
   /**
49
    * @var string[]
50
    */
51
    protected $excludedParams = array();
52
    
53
   /**
54
    * @var bool
55
    * @see URLInfo::setParamExclusion()
56
    */
57
    protected $paramExclusion = false;
58
    
59
   /**
60
    * @var array
61
    * @see URLInfo::getTypeLabel()
62
    */
63
    protected static $typeLabels;
64
    
65
   /**
66
    * @var bool
67
    */
68
    protected $highlightExcluded = false;
69
    
70
   /**
71
    * @var array
72
    */
73
    protected $infoKeys = array(
74
        'scheme',
75
        'host',
76
        'port',
77
        'user',
78
        'pass',
79
        'path',
80
        'query',
81
        'fragment'
82
    );
83
    
84
   /**
85
    * @var string
86
    */
87
    protected $url;
88
    
89
   /**
90
    * @var URLInfo_Parser
91
    */
92
    protected $parser;
93
    
94
   /**
95
    * @var URLInfo_Normalizer
96
    */
97
    protected $normalizer;
98
    
99
   /**
100
    * @var bool
101
    */
102
    protected $encodeUTFChars = false;
103
    
104
    public function __construct(string $url)
105
    {
106
        $this->rawURL = $url;
107
        $this->url = self::filterURL($url);
108
        
109
        $this->parse();
110
    }
111
    
112
    protected function parse() : void
113
    {
114
        $this->parser = new URLInfo_Parser($this->url, $this->encodeUTFChars);
115
        $this->info = $this->parser->getInfo();
116
    }
117
    
118
   /**
119
    * Whether to URL encode any non-encoded UTF8 characters in the URL.
120
    * Default is to leave them as-is for better readability, since 
121
    * browsers handle this well.
122
    * 
123
    * @param bool $enabled
124
    * @return URLInfo
125
    */
126
    public function setUTFEncoding(bool $enabled=true) : URLInfo
127
    {
128
        if($this->encodeUTFChars !== $enabled)
129
        {
130
            $this->encodeUTFChars = $enabled;
131
            $this->parse(); // re-parse the URL to apply the changes
132
        }
133
        
134
        return $this;
135
    }
136
    
137
    public function isUTFEncodingEnabled() : bool
138
    {
139
        return $this->encodeUTFChars;
140
    }
141
    
142
   /**
143
    * Filters an URL: removes control characters and the
144
    * like to have a clean URL to work with.
145
    * 
146
    * @param string $url
147
    * @return string
148
    */
149
    public static function filterURL(string $url)
150
    {
151
        return URLInfo_Filter::filter($url);
152
    }
153
    
154
    /**
155
     * Checks if it is an https link.
156
     * @return boolean
157
     */
158
    public function isSecure() : bool
159
    {
160
        return isset($this->info['scheme']) && $this->info['scheme'] === 'https';
161
    }
162
    
163
    public function isAnchor() : bool
164
    {
165
        return $this->info['type'] === self::TYPE_FRAGMENT;
166
    }
167
    
168
    public function isEmail() : bool
169
    {
170
        return $this->info['type'] === self::TYPE_EMAIL;
171
    }
172
    
173
    public function isPhoneNumber() : bool
174
    {
175
        return $this->info['type'] === self::TYPE_PHONE;
176
    }
177
    
178
   /**
179
    * Whether the URL is a regular URL, not one of the 
180
    * other types like a phone number or email address.
181
    * 
182
    * @return bool
183
    */
184
    public function isURL() : bool
185
    {
186
        $host = $this->getHost();
187
        return !empty($host);
188
    }
189
    
190
    public function isValid() : bool
191
    {
192
        return $this->parser->isValid();
193
    }
194
    
195
   /**
196
    * Retrieves the host name, or an empty string if none is present.
197
    * 
198
    * @return string
199
    */
200
    public function getHost() : string
201
    {
202
        return $this->getInfoKey('host');
203
    }
204
    
205
   /**
206
    * Retrieves the path, or an empty string if none is present.
207
    * @return string
208
    */
209
    public function getPath() : string
210
    {
211
        return $this->getInfoKey('path');
212
    }
213
    
214
    public function getFragment() : string
215
    {
216
        return $this->getInfoKey('fragment');
217
    }
218
    
219
    public function getScheme() : string
220
    {
221
        return $this->getInfoKey('scheme');
222
    }
223
    
224
   /**
225
    * Retrieves the port specified in the URL, or -1 if none is preseent.
226
    * @return int
227
    */
228
    public function getPort() : int
229
    {
230
        $port = $this->getInfoKey('port');
231
        
232
        if(!empty($port)) {
233
            return (int)$port;
234
        }
235
        
236
        return -1;
237
    }
238
    
239
   /**
240
    * Retrieves the raw query string, or an empty string if none is present.
241
    * 
242
    * @return string
243
    * 
244
    * @see URLInfo::getParams()
245
    */
246
    public function getQuery() : string
247
    {
248
        return $this->getInfoKey('query');
249
    }
250
    
251
    public function getUsername() : string
252
    {
253
        return $this->getInfoKey('user');
254
    }
255
    
256
    public function getPassword() : string
257
    {
258
        return $this->getInfoKey('pass');
259
    }
260
    
261
   /**
262
    * Whether the URL contains a port number.
263
    * @return bool
264
    */
265
    public function hasPort() : bool
266
    {
267
        return $this->getPort() !== -1;
268
    }
269
    
270
   /**
271
    * Alias for the hasParams() method.
272
    * @return bool
273
    * @see URLInfo::hasParams()
274
    */
275
    public function hasQuery() : bool
276
    {
277
        return $this->hasParams();
278
    }
279
    
280
    public function hasHost() : bool
281
    {
282
        return $this->getHost() !== ''; 
283
    }
284
    
285
    public function hasPath() : bool
286
    {
287
        return $this->getPath() !== '';
288
    }
289
    
290
    public function hasFragment() : bool
291
    {
292
        return $this->getFragment() !== '';
293
    }
294
    
295
    public function hasUsername() : bool
296
    {
297
        return $this->getUsername() !== '';
298
    }
299
    
300
    public function hasPassword() : bool
301
    {
302
        return $this->getPassword() !== '';
303
    }
304
    
305
    public function hasScheme() : bool
306
    {
307
        return $this->getScheme() !== '';
308
    }
309
    
310
    protected function getInfoKey(string $name) : string
311
    {
312
        if(isset($this->info[$name])) {
313
            return (string)$this->info[$name];
314
        }
315
        
316
        return '';
317
    }
318
319
   /**
320
    * Retrieves a normalized URL: this ensures that all parameters
321
    * in the URL are always in the same order.
322
    * 
323
    * @return string
324
    */
325
    public function getNormalized() : string
326
    {
327
        return $this->normalize(true);
328
    }
329
    
330
   /**
331
    * Like getNormalized(), but if a username and password are present
332
    * in the URL, returns the URL without them.
333
    * 
334
    * @return string
335
    */
336
    public function getNormalizedWithoutAuth() : string
337
    {
338
        return $this->normalize(false);
339
    }
340
    
341
    protected function normalize(bool $auth=true) : string
342
    {
343
        if(!$this->isValid()) {
344
            return '';
345
        }
346
        
347
        if(!isset($this->normalizer)) {
348
            $this->normalizer = new URLInfo_Normalizer($this);
349
        }
350
        
351
        $this->normalizer->enableAuth($auth);
352
        
353
        return $this->normalizer->normalize();
354
    }
355
    
356
   /**
357
    * Creates a hash of the URL, which can be used for comparisons.
358
    * Since any parameters in the URL's query are sorted alphabetically,
359
    * the same links with a different parameter order will have the 
360
    * same hash.
361
    * 
362
    * @return string
363
    */
364
    public function getHash()
365
    {
366
        return \AppUtils\ConvertHelper::string2shortHash($this->getNormalized());
367
    }
368
369
   /**
370
    * Highlights the URL using HTML tags with specific highlighting
371
    * class names.
372
    * 
373
    * @return string Will return an empty string if the URL is not valid.
374
    */
375
    public function getHighlighted() : string
376
    {
377
        if(!$this->isValid()) {
378
            return '';
379
        }
380
        
381
        $highlighter = new URLInfo_Highlighter($this);
382
        
383
        return $highlighter->highlight();
384
    }
385
    
386
    public function getErrorMessage() : string
387
    {
388
        return $this->parser->getErrorMessage();
389
    }
390
    
391
    public function getErrorCode() : int
392
    {
393
        return $this->parser->getErrorCode();
394
    }
395
    
396
    public function hasParams() : bool
397
    {
398
        $params = $this->getParams();
399
        return !empty($params);
400
    }
401
    
402
    public function countParams() : int
403
    {
404
        $params = $this->getParams();
405
        return count($params);
406
    }
407
    
408
   /**
409
    * Retrieves all parameters specified in the url,
410
    * if any, as an associative array. 
411
    * 
412
    * NOTE: Ignores parameters that have been added
413
    * to the excluded parameters list.
414
    *
415
    * @return array
416
    */
417
    public function getParams() : array
418
    {
419
        if(!$this->paramExclusion || empty($this->excludedParams)) {
420
            return $this->info['params'];
421
        }
422
        
423
        $keep = array();
424
        foreach($this->info['params'] as $name => $value) 
425
        {
426
            if(!isset($this->excludedParams[$name])) {
427
                $keep[$name] = $value;
428
            }
429
        }
430
        
431
        return $keep;
432
    }
433
    
434
   /**
435
    * Retrieves the names of all parameters present in the URL, if any.
436
    * @return string[]
437
    */
438
    public function getParamNames() : array
439
    {
440
        $params = $this->getParams();
441
        return array_keys($params);
442
    }
443
    
444
   /**
445
    * Retrieves a specific parameter value from the URL.
446
    * 
447
    * @param string $name
448
    * @return string The parameter value, or an empty string if it does not exist.
449
    */
450
    public function getParam(string $name) : string
451
    {
452
        if(isset($this->info['params'][$name])) {
453
            return $this->info['params'][$name];
454
        }
455
        
456
        return '';
457
    }
458
    
459
   /**
460
    * Excludes an URL parameter entirely if present:
461
    * the parser will act as if the parameter was not
462
    * even present in the source URL, effectively
463
    * stripping it.
464
    *
465
    * @param string $name
466
    * @param string $reason A human readable explanation why this is excluded - used when highlighting links.
467
    * @return URLInfo
468
    */
469
    public function excludeParam(string $name, string $reason) : URLInfo
470
    {
471
        if(!isset($this->excludedParams[$name]))
472
        {
473
            $this->excludedParams[$name] = $reason;
474
            $this->setParamExclusion();
475
        }
476
        
477
        return $this;
478
    }
479
480
    /**
481
     * Retrieves a string identifier of the type of URL that was detected.
482
     *
483
     * @return string
484
     *
485
     * @see URLInfo::TYPE_EMAIL
486
     * @see URLInfo::TYPE_FRAGMENT
487
     * @see URLInfo::TYPE_PHONE
488
     * @see URLInfo::TYPE_URL
489
     */
490
    public function getType() : string
491
    {
492
        return $this->info['type'];
493
    }
494
    
495
    public function getTypeLabel() : string
496
    {
497
        if(!isset(self::$typeLabels))
498
        {
499
            self::$typeLabels = array(
500
                self::TYPE_EMAIL => t('Email'),
501
                self::TYPE_FRAGMENT => t('Jump mark'),
502
                self::TYPE_PHONE => t('Phone number'),
503
                self::TYPE_URL => t('URL'),
504
            );
505
        }
506
        
507
        $type = $this->getType();
508
        
509
        if(!isset(self::$typeLabels[$type]))
510
        {
511
            throw new BaseException(
512
                sprintf('Unknown URL type label for type [%s].', $type),
513
                null,
514
                self::ERROR_UNKNOWN_TYPE_FOR_LABEL
515
            );
516
        }
517
        
518
        return self::$typeLabels[$this->getType()];
519
    }
520
521
   /**
522
    * Whether excluded parameters should be highlighted in
523
    * a different color in the URL when using the
524
    * {@link URLInfo::getHighlighted()} method.
525
    *
526
    * @param bool $highlight
527
    * @return URLInfo
528
    */
529
    public function setHighlightExcluded(bool $highlight=true) : URLInfo
530
    {
531
        $this->highlightExcluded = $highlight;
532
        return $this;
533
    }
534
    
535
   /**
536
    * Returns an array with all relevant URL information.
537
    * 
538
    * @return array
539
    */
540
    public function toArray() : array
541
    {
542
        return array(
543
            'hasParams' => $this->hasParams(),
544
            'params' => $this->getParams(),
545
            'type' => $this->getType(),
546
            'typeLabel' => $this->getTypeLabel(),
547
            'normalized' => $this->getNormalized(),
548
            'highlighted' => $this->getHighlighted(),
549
            'hash' => $this->getHash(),
550
            'host' => $this->getHost(),
551
            'isValid' => $this->isValid(),
552
            'isURL' => $this->isURL(),
553
            'isEmail' => $this->isEmail(),
554
            'isAnchor' => $this->isAnchor(),
555
            'isPhoneNumber' => $this->isPhoneNumber(),
556
            'errorMessage' => $this->getErrorMessage(),
557
            'errorCode' => $this->getErrorCode(),
558
            'excludedParams' => array_keys($this->excludedParams)
559
        );
560
    }
561
    
562
    /**
563
     * Enable or disable parameter exclusion: if any parameters
564
     * to exclude have been added, this allows switching between
565
     * both modes. When enabled, methods like getNormalized or
566
     * getHighlighted will exclude any parameters to exclude. When
567
     * disabled, it will act as usual.
568
     *
569
     * This allows adding parameters to exclude, but still have
570
     * access to the original URLs.
571
     *
572
     * @param bool $enabled
573
     * @return URLInfo
574
     * @see URLInfo::isParamExclusionEnabled()
575
     * @see URLInfo::setHighlightExcluded()
576
     */
577
    public function setParamExclusion(bool $enabled=true) : URLInfo
578
    {
579
        $this->paramExclusion = $enabled;
580
        return $this;
581
    }
582
    
583
   /**
584
    * Whether the parameter exclusion mode is enabled:
585
    * In this case, if any parameters have been added to the
586
    * exclusion list, all relevant methods will exclude these.
587
    *
588
    * @return bool
589
    */
590
    public function isParamExclusionEnabled() : bool
591
    {
592
        return $this->paramExclusion;
593
    }
594
    
595
   /**
596
    * Checks whether the link contains any parameters that
597
    * are on the list of excluded parameters.
598
    *
599
    * @return bool
600
    */
601
    public function containsExcludedParams() : bool
602
    {
603
        if(empty($this->excludedParams)) {
604
            return false;
605
        }
606
        
607
        $names = array_keys($this->info['params']);
608
        foreach($names as $name) {
609
            if(isset($this->excludedParams[$name])) {
610
                return true;
611
            }
612
        }
613
        
614
        return false;
615
    }
616
    
617
    public function hasParam(string $name) : bool
618
    {
619
        $names = $this->getParamNames();
620
        return in_array($name, $names);
621
    }
622
623
    public function offsetSet($offset, $value) 
624
    {
625
        if(in_array($offset, $this->infoKeys)) {
626
            $this->info[$offset] = $value;
627
        }
628
    }
629
    
630
    public function offsetExists($offset) 
631
    {
632
        return isset($this->info[$offset]);
633
    }
634
    
635
    public function offsetUnset($offset) 
636
    {
637
        unset($this->info[$offset]);
638
    }
639
    
640
    public function offsetGet($offset) 
641
    {
642
        if($offset === 'port') {
643
            return $this->getPort();
644
        }
645
        
646
        if(in_array($offset, $this->infoKeys)) {
647
            return $this->getInfoKey($offset);
648
        }
649
        
650
        return '';
651
    }
652
    
653
    public static function getHighlightCSS() : string
654
    {
655
        return URLInfo_Highlighter::getHighlightCSS();
656
    }
657
    
658
    public function getExcludedParams() : array
659
    {
660
        return $this->excludedParams;
661
    }
662
    
663
    public function isHighlightExcludeEnabled() : bool
664
    {
665
        return $this->highlightExcluded;
666
    }
667
    
668
   /**
669
    * Checks if the URL exists, i.e. can be connected to. Will return
670
    * true if the returned HTTP status code is `200` or `302`.
671
    * 
672
    * NOTE: If the target URL requires HTTP authentication, the username
673
    * and password should be integrated into the URL.
674
    * 
675
    * @return bool
676
    * @throws BaseException
677
    */
678
    public function tryConnect(bool $verifySSL=true) : bool
679
    {
680
        return $this->createConnectionTester()
681
        ->setVerifySSL($verifySSL)
682
        ->canConnect();
683
    }
684
    
685
   /**
686
    * Creates the connection tester instance that is used
687
    * to check if a URL can be connected to, and which is
688
    * used in the {@see URLInfo::tryConnect()} method. It
689
    * allows more settings to be used.
690
    * 
691
    * @return URLInfo_ConnectionTester
692
    */
693
    public function createConnectionTester() : URLInfo_ConnectionTester
694
    {
695
        return new URLInfo_ConnectionTester($this);
696
    }
697
    
698
   /**
699
    * Adds/overwrites an URL parameter.
700
    *  
701
    * @param string $name
702
    * @param string $val
703
    * @return URLInfo
704
    */
705
    public function setParam(string $name, string $val) : URLInfo
706
    {
707
        $this->info['params'][$name] = $val;
708
        
709
        return $this;
710
    }
711
    
712
   /**
713
    * Removes an URL parameter. Has no effect if the 
714
    * parameter is not present to begin with.
715
    * 
716
    * @param string $param
717
    * @return URLInfo
718
    */
719
    public function removeParam(string $param) : URLInfo
720
    {
721
        if(isset($this->info['params'][$param]))
722
        {
723
            unset($this->info['params'][$param]);
724
        }
725
        
726
        return $this;
727
    }
728
}
729