Passed
Push — master ( 61985e...8db7dc )
by Sebastian
04:44
created

URLInfo::parse()   A

Complexity

Conditions 1
Paths 1

Size

Total Lines 4
Code Lines 2

Duplication

Lines 0
Ratio 0 %

Importance

Changes 2
Bugs 0 Features 0
Metric Value
eloc 2
c 2
b 0
f 0
dl 0
loc 4
rs 10
cc 1
nc 1
nop 0
1
<?php
2
/**
3
 * File containing the {@see AppUtils\URLInfo} class.
4
 * 
5
 * @package Application Utils
6
 * @subpackage URLInfo
7
 * @see AppUtils\URLInfo
8
 */
9
10
declare(strict_types=1);
11
12
namespace AppUtils;
13
14
/**
15
 * Replacement for PHP's native `parse_url` function, which
16
 * handles some common pitfalls and issues that are hard to 
17
 * follow, as well as adding a number of utility methods.
18
 * 
19
 * @package Application Utils
20
 * @subpackage URLInfo
21
 * @author Sebastian Mordziol <[email protected]>
22
 */
23
class URLInfo implements \ArrayAccess
24
{
25
    const ERROR_MISSING_SCHEME = 42101;
26
    
27
    const ERROR_INVALID_SCHEME = 42102;
28
29
    const ERROR_MISSING_HOST = 42103;
30
    
31
    const ERROR_CANNOT_FIND_CSS_FOLDER = 42104;
32
    
33
    const ERROR_UNKNOWN_TYPE_FOR_LABEL = 42105;
34
    
35
    const ERROR_CURL_INIT_FAILED = 42106;
36
    
37
    const TYPE_EMAIL = 'email';
38
    const TYPE_FRAGMENT = 'fragment';
39
    const TYPE_PHONE = 'phone';
40
    const TYPE_URL = 'url';
41
    
42
   /**
43
    * The original URL that was passed to the constructor.
44
    * @var string
45
    */
46
    protected $rawURL;
47
48
   /**
49
    * @var array
50
    */
51
    protected $info;
52
    
53
   /**
54
    * @var string[]
55
    */
56
    protected $excludedParams = array();
57
    
58
   /**
59
    * @var bool
60
    * @see URLInfo::setParamExclusion()
61
    */
62
    protected $paramExclusion = false;
63
    
64
   /**
65
    * @var array
66
    * @see URLInfo::getTypeLabel()
67
    */
68
    protected static $typeLabels;
69
    
70
   /**
71
    * @var bool
72
    */
73
    protected $highlightExcluded = false;
74
    
75
   /**
76
    * @var array
77
    */
78
    protected $infoKeys = array(
79
        'scheme',
80
        'host',
81
        'port',
82
        'user',
83
        'pass',
84
        'path',
85
        'query',
86
        'fragment'
87
    );
88
    
89
   /**
90
    * @var string
91
    */
92
    protected $url;
93
    
94
   /**
95
    * @var URLInfo_Parser
96
    */
97
    protected $parser;
98
    
99
   /**
100
    * @var URLInfo_Normalizer
101
    */
102
    protected $normalizer;
103
    
104
   /**
105
    * @var bool
106
    */
107
    protected $encodeUTFChars = false;
108
    
109
    public function __construct(string $url)
110
    {
111
        $this->rawURL = $url;
112
        $this->url = self::filterURL($url);
113
        
114
        $this->parse();
115
    }
116
    
117
    protected function parse() : void
118
    {
119
        $this->parser = new URLInfo_Parser($this->url, $this->encodeUTFChars);
120
        $this->info = $this->parser->getInfo();
121
    }
122
    
123
   /**
124
    * Whether to URL encode any non-encoded UTF8 characters in the URL.
125
    * Default is to leave them as-is for better readability, since 
126
    * browsers handle this well.
127
    * 
128
    * @param bool $enabled
129
    * @return URLInfo
130
    */
131
    public function setUTFEncoding(bool $enabled=true) : URLInfo
132
    {
133
        if($this->encodeUTFChars !== $enabled)
134
        {
135
            $this->encodeUTFChars = $enabled;
136
            $this->parse(); // re-parse the URL to apply the changes
137
        }
138
        
139
        return $this;
140
    }
141
    
142
    public function isUTFEncodingEnabled() : bool
143
    {
144
        return $this->encodeUTFChars;
145
    }
146
    
147
   /**
148
    * Filters an URL: removes control characters and the
149
    * like to have a clean URL to work with.
150
    * 
151
    * @param string $url
152
    * @return string
153
    */
154
    public static function filterURL(string $url)
155
    {
156
        return URLInfo_Filter::filter($url);
157
    }
158
    
159
    /**
160
     * Checks if it is an https link.
161
     * @return boolean
162
     */
163
    public function isSecure() : bool
164
    {
165
        return isset($this->info['scheme']) && $this->info['scheme'] === 'https';
166
    }
167
    
168
    public function isAnchor() : bool
169
    {
170
        return $this->info['type'] === self::TYPE_FRAGMENT;
171
    }
172
    
173
    public function isEmail() : bool
174
    {
175
        return $this->info['type'] === self::TYPE_EMAIL;
176
    }
177
    
178
    public function isPhoneNumber() : bool
179
    {
180
        return $this->info['type'] === self::TYPE_PHONE;
181
    }
182
    
183
   /**
184
    * Whether the URL is a regular URL, not one of the 
185
    * other types like a phone number or email address.
186
    * 
187
    * @return bool
188
    */
189
    public function isURL() : bool
190
    {
191
        $host = $this->getHost();
192
        return !empty($host);
193
    }
194
    
195
    public function isValid() : bool
196
    {
197
        return $this->parser->isValid();
198
    }
199
    
200
   /**
201
    * Retrieves the host name, or an empty string if none is present.
202
    * 
203
    * @return string
204
    */
205
    public function getHost() : string
206
    {
207
        return $this->getInfoKey('host');
208
    }
209
    
210
   /**
211
    * Retrieves the path, or an empty string if none is present.
212
    * @return string
213
    */
214
    public function getPath() : string
215
    {
216
        return $this->getInfoKey('path');
217
    }
218
    
219
    public function getFragment() : string
220
    {
221
        return $this->getInfoKey('fragment');
222
    }
223
    
224
    public function getScheme() : string
225
    {
226
        return $this->getInfoKey('scheme');
227
    }
228
    
229
   /**
230
    * Retrieves the port specified in the URL, or -1 if none is preseent.
231
    * @return int
232
    */
233
    public function getPort() : int
234
    {
235
        $port = $this->getInfoKey('port');
236
        
237
        if(!empty($port)) {
238
            return (int)$port;
239
        }
240
        
241
        return -1;
242
    }
243
    
244
   /**
245
    * Retrieves the raw query string, or an empty string if none is present.
246
    * 
247
    * @return string
248
    * 
249
    * @see URLInfo::getParams()
250
    */
251
    public function getQuery() : string
252
    {
253
        return $this->getInfoKey('query');
254
    }
255
    
256
    public function getUsername() : string
257
    {
258
        return $this->getInfoKey('user');
259
    }
260
    
261
    public function getPassword() : string
262
    {
263
        return $this->getInfoKey('pass');
264
    }
265
    
266
   /**
267
    * Whether the URL contains a port number.
268
    * @return bool
269
    */
270
    public function hasPort() : bool
271
    {
272
        return $this->getPort() !== -1;
273
    }
274
    
275
   /**
276
    * Alias for the hasParams() method.
277
    * @return bool
278
    * @see URLInfo::hasParams()
279
    */
280
    public function hasQuery() : bool
281
    {
282
        return $this->hasParams();
283
    }
284
    
285
    public function hasHost() : bool
286
    {
287
        return $this->getHost() !== ''; 
288
    }
289
    
290
    public function hasPath() : bool
291
    {
292
        return $this->getPath() !== '';
293
    }
294
    
295
    public function hasFragment() : bool
296
    {
297
        return $this->getFragment() !== '';
298
    }
299
    
300
    public function hasUsername() : bool
301
    {
302
        return $this->getUsername() !== '';
303
    }
304
    
305
    public function hasPassword() : bool
306
    {
307
        return $this->getPassword() !== '';
308
    }
309
    
310
    public function hasScheme() : bool
311
    {
312
        return $this->getScheme() !== '';
313
    }
314
    
315
    protected function getInfoKey(string $name) : string
316
    {
317
        if(isset($this->info[$name])) {
318
            return (string)$this->info[$name];
319
        }
320
        
321
        return '';
322
    }
323
324
   /**
325
    * Retrieves a normalized URL: this ensures that all parameters
326
    * in the URL are always in the same order.
327
    * 
328
    * @return string
329
    */
330
    public function getNormalized() : string
331
    {
332
        return $this->normalize(true);
333
    }
334
    
335
   /**
336
    * Like getNormalized(), but if a username and password are present
337
    * in the URL, returns the URL without them.
338
    * 
339
    * @return string
340
    */
341
    public function getNormalizedWithoutAuth() : string
342
    {
343
        return $this->normalize(false);
344
    }
345
    
346
    protected function normalize(bool $auth=true) : string
347
    {
348
        if(!$this->isValid()) {
349
            return '';
350
        }
351
        
352
        if(!isset($this->normalizer)) {
353
            $this->normalizer = new URLInfo_Normalizer($this);
354
        }
355
        
356
        $this->normalizer->enableAuth($auth);
357
        
358
        return $this->normalizer->normalize();
359
    }
360
    
361
   /**
362
    * Creates a hash of the URL, which can be used for comparisons.
363
    * Since any parameters in the URL's query are sorted alphabetically,
364
    * the same links with a different parameter order will have the 
365
    * same hash.
366
    * 
367
    * @return string
368
    */
369
    public function getHash()
370
    {
371
        return \AppUtils\ConvertHelper::string2shortHash($this->getNormalized());
372
    }
373
374
   /**
375
    * Highlights the URL using HTML tags with specific highlighting
376
    * class names.
377
    * 
378
    * @return string Will return an empty string if the URL is not valid.
379
    */
380
    public function getHighlighted() : string
381
    {
382
        if(!$this->isValid()) {
383
            return '';
384
        }
385
        
386
        $highlighter = new URLInfo_Highlighter($this);
387
        
388
        return $highlighter->highlight();
389
    }
390
    
391
    public function getErrorMessage() : string
392
    {
393
        return $this->parser->getErrorMessage();
394
    }
395
    
396
    public function getErrorCode() : int
397
    {
398
        return $this->parser->getErrorCode();
399
    }
400
    
401
    public function hasParams() : bool
402
    {
403
        $params = $this->getParams();
404
        return !empty($params);
405
    }
406
    
407
    public function countParams() : int
408
    {
409
        $params = $this->getParams();
410
        return count($params);
411
    }
412
    
413
   /**
414
    * Retrieves all parameters specified in the url,
415
    * if any, as an associative array. 
416
    * 
417
    * NOTE: Ignores parameters that have been added
418
    * to the excluded parameters list.
419
    *
420
    * @return array
421
    */
422
    public function getParams() : array
423
    {
424
        if(!$this->paramExclusion || empty($this->excludedParams)) {
425
            return $this->info['params'];
426
        }
427
        
428
        $keep = array();
429
        foreach($this->info['params'] as $name => $value) 
430
        {
431
            if(!isset($this->excludedParams[$name])) {
432
                $keep[$name] = $value;
433
            }
434
        }
435
        
436
        return $keep;
437
    }
438
    
439
   /**
440
    * Retrieves the names of all parameters present in the URL, if any.
441
    * @return string[]
442
    */
443
    public function getParamNames() : array
444
    {
445
        $params = $this->getParams();
446
        return array_keys($params);
447
    }
448
    
449
   /**
450
    * Retrieves a specific parameter value from the URL.
451
    * 
452
    * @param string $name
453
    * @return string The parameter value, or an empty string if it does not exist.
454
    */
455
    public function getParam(string $name) : string
456
    {
457
        if(isset($this->info['params'][$name])) {
458
            return $this->info['params'][$name];
459
        }
460
        
461
        return '';
462
    }
463
    
464
   /**
465
    * Excludes an URL parameter entirely if present:
466
    * the parser will act as if the parameter was not
467
    * even present in the source URL, effectively
468
    * stripping it.
469
    *
470
    * @param string $name
471
    * @param string $reason A human readable explanation why this is excluded - used when highlighting links.
472
    * @return URLInfo
473
    */
474
    public function excludeParam(string $name, string $reason) : URLInfo
475
    {
476
        if(!isset($this->excludedParams[$name]))
477
        {
478
            $this->excludedParams[$name] = $reason;
479
            $this->setParamExclusion();
480
        }
481
        
482
        return $this;
483
    }
484
485
    /**
486
     * Retrieves a string identifier of the type of URL that was detected.
487
     *
488
     * @return string
489
     *
490
     * @see URLInfo::TYPE_EMAIL
491
     * @see URLInfo::TYPE_FRAGMENT
492
     * @see URLInfo::TYPE_PHONE
493
     * @see URLInfo::TYPE_URL
494
     */
495
    public function getType() : string
496
    {
497
        return $this->info['type'];
498
    }
499
    
500
    public function getTypeLabel() : string
501
    {
502
        if(!isset(self::$typeLabels))
503
        {
504
            self::$typeLabels = array(
505
                self::TYPE_EMAIL => t('Email'),
506
                self::TYPE_FRAGMENT => t('Jump mark'),
507
                self::TYPE_PHONE => t('Phone number'),
508
                self::TYPE_URL => t('URL'),
509
            );
510
        }
511
        
512
        $type = $this->getType();
513
        
514
        if(!isset(self::$typeLabels[$type]))
515
        {
516
            throw new BaseException(
517
                sprintf('Unknown URL type label for type [%s].', $type),
518
                null,
519
                self::ERROR_UNKNOWN_TYPE_FOR_LABEL
520
            );
521
        }
522
        
523
        return self::$typeLabels[$this->getType()];
524
    }
525
526
   /**
527
    * Whether excluded parameters should be highlighted in
528
    * a different color in the URL when using the
529
    * {@link URLInfo::getHighlighted()} method.
530
    *
531
    * @param bool $highlight
532
    * @return URLInfo
533
    */
534
    public function setHighlightExcluded(bool $highlight=true) : URLInfo
535
    {
536
        $this->highlightExcluded = $highlight;
537
        return $this;
538
    }
539
    
540
   /**
541
    * Returns an array with all relevant URL information.
542
    * 
543
    * @return array
544
    */
545
    public function toArray() : array
546
    {
547
        return array(
548
            'hasParams' => $this->hasParams(),
549
            'params' => $this->getParams(),
550
            'type' => $this->getType(),
551
            'typeLabel' => $this->getTypeLabel(),
552
            'normalized' => $this->getNormalized(),
553
            'highlighted' => $this->getHighlighted(),
554
            'hash' => $this->getHash(),
555
            'host' => $this->getHost(),
556
            'isValid' => $this->isValid(),
557
            'isURL' => $this->isURL(),
558
            'isEmail' => $this->isEmail(),
559
            'isAnchor' => $this->isAnchor(),
560
            'isPhoneNumber' => $this->isPhoneNumber(),
561
            'errorMessage' => $this->getErrorMessage(),
562
            'errorCode' => $this->getErrorCode(),
563
            'excludedParams' => array_keys($this->excludedParams)
564
        );
565
    }
566
    
567
    /**
568
     * Enable or disable parameter exclusion: if any parameters
569
     * to exclude have been added, this allows switching between
570
     * both modes. When enabled, methods like getNormalized or
571
     * getHighlighted will exclude any parameters to exclude. When
572
     * disabled, it will act as usual.
573
     *
574
     * This allows adding parameters to exclude, but still have
575
     * access to the original URLs.
576
     *
577
     * @param bool $enabled
578
     * @return URLInfo
579
     * @see URLInfo::isParamExclusionEnabled()
580
     * @see URLInfo::setHighlightExcluded()
581
     */
582
    public function setParamExclusion(bool $enabled=true) : URLInfo
583
    {
584
        $this->paramExclusion = $enabled;
585
        return $this;
586
    }
587
    
588
   /**
589
    * Whether the parameter exclusion mode is enabled:
590
    * In this case, if any parameters have been added to the
591
    * exclusion list, all relevant methods will exclude these.
592
    *
593
    * @return bool
594
    */
595
    public function isParamExclusionEnabled() : bool
596
    {
597
        return $this->paramExclusion;
598
    }
599
    
600
   /**
601
    * Checks whether the link contains any parameters that
602
    * are on the list of excluded parameters.
603
    *
604
    * @return bool
605
    */
606
    public function containsExcludedParams() : bool
607
    {
608
        if(empty($this->excludedParams)) {
609
            return false;
610
        }
611
        
612
        $names = array_keys($this->info['params']);
613
        foreach($names as $name) {
614
            if(isset($this->excludedParams[$name])) {
615
                return true;
616
            }
617
        }
618
        
619
        return false;
620
    }
621
    
622
    public function hasParam(string $name) : bool
623
    {
624
        $names = $this->getParamNames();
625
        return in_array($name, $names);
626
    }
627
628
    public function offsetSet($offset, $value) 
629
    {
630
        if(in_array($offset, $this->infoKeys)) {
631
            $this->info[$offset] = $value;
632
        }
633
    }
634
    
635
    public function offsetExists($offset) 
636
    {
637
        return isset($this->info[$offset]);
638
    }
639
    
640
    public function offsetUnset($offset) 
641
    {
642
        unset($this->info[$offset]);
643
    }
644
    
645
    public function offsetGet($offset) 
646
    {
647
        if($offset === 'port') {
648
            return $this->getPort();
649
        }
650
        
651
        if(in_array($offset, $this->infoKeys)) {
652
            return $this->getInfoKey($offset);
653
        }
654
        
655
        return '';
656
    }
657
    
658
    public static function getHighlightCSS() : string
659
    {
660
        return URLInfo_Highlighter::getHighlightCSS();
661
    }
662
    
663
    public function getExcludedParams() : array
664
    {
665
        return $this->excludedParams;
666
    }
667
    
668
    public function isHighlightExcludeEnabled() : bool
669
    {
670
        return $this->highlightExcluded;
671
    }
672
    
673
   /**
674
    * Checks if the URL exists, i.e. can be connected to. Will return
675
    * true if the returned HTTP status code is `200` or `302`.
676
    * 
677
    * NOTE: If the target URL requires HTTP authentication, the username
678
    * and password should be integrated into the URL.
679
    * 
680
    * @return bool
681
    * @throws BaseException
682
    */
683
    public function tryConnect(bool $verifySSL=true) : bool
684
    {
685
        requireCURL();
686
        
687
        $ch = curl_init();
688
        if(!is_resource($ch))
689
        {
690
            throw new BaseException(
691
                'Could not initialize a new cURL instance.',
692
                'Calling curl_init returned false. Additional information is not available.',
693
                self::ERROR_CURL_INIT_FAILED
694
            );
695
        }
696
        
697
        //curl_setopt($ch, CURLOPT_VERBOSE, true);
698
        
699
        curl_setopt($ch, CURLOPT_URL, $this->getNormalized());
700
        curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
701
        curl_setopt($ch, CURLOPT_TIMEOUT, 10);
702
        curl_setopt($ch, CURLOPT_FOLLOWLOCATION, true);
703
        
704
        if(!$verifySSL) 
705
        {
706
            curl_setopt($ch, CURLOPT_SSL_VERIFYHOST, false);
707
            curl_setopt($ch, CURLOPT_SSL_VERIFYPEER, false);
708
        }
709
        
710
        if($this->hasUsername()) 
711
        {
712
            curl_setopt($ch, CURLOPT_USERNAME, $this->getUsername());
713
            curl_setopt($ch, CURLOPT_PASSWORD, $this->getPassword());
714
        }
715
        
716
        curl_exec($ch);
717
        
718
        $http_code = (int)curl_getinfo($ch, CURLINFO_HTTP_CODE);
719
        
720
        curl_close($ch);
721
        
722
        return ($http_code === 200) || ($http_code === 302);
723
    }
724
}
725