Passed
Push — master ( b28623...b691b7 )
by Sebastian
04:23
created

ConvertHelper_URLFinder::getURLs()   A

Complexity

Conditions 6
Paths 4

Size

Total Lines 28
Code Lines 12

Duplication

Lines 0
Ratio 0 %

Importance

Changes 1
Bugs 0 Features 0
Metric Value
eloc 12
c 1
b 0
f 0
dl 0
loc 28
rs 9.2222
cc 6
nc 4
nop 0
1
<?php
2
/**
3
 * File containing the {@see AppUtils\ConvertHelper_URLFinder} class.
4
 *
5
 * @package Application Utils
6
 * @subpackage ConvertHelper
7
 * @see AppUtils\ConvertHelper_URLFinder
8
 */
9
10
declare(strict_types=1);
11
12
namespace AppUtils;
13
14
/**
15
 * Can find any URLs in a string, be it plain text or HTML, XML.
16
 *
17
 * @package Application Utils
18
 * @subpackage ConvertHelper
19
 * @author Sebastian Mordziol <[email protected]>
20
 * 
21
 * @see ConvertHelper::createURLFinder()
22
 */
23
class ConvertHelper_URLFinder
24
{
25
   /**
26
    * @var string
27
    */
28
    protected $subject;
29
    
30
   /**
31
    * @var boolean
32
    */
33
    protected $sorting = false;
34
    
35
    protected $schemes = array(
36
        'http',
37
        'https',
38
        'ftp',
39
        'ftps',
40
        'mailto',
41
        'svn',
42
        'ssl',
43
        'tel',
44
    );
45
    
46
    public function __construct(string $subject)
47
    {
48
        $this->subject = $subject;
49
    }
50
    
51
   /**
52
    * Whether to enable sorting the URLs alphabetically (disabled by default).
53
    * 
54
    * @param bool $enabled
55
    * @return ConvertHelper_URLFinder
56
    */
57
    public function enableSorting(bool $enabled=true) : ConvertHelper_URLFinder
58
    {
59
        $this->sorting = $enabled;
60
        
61
        return $this;
62
    }
63
    
64
   /**
65
    * Prepares the subject string by adding a newline before all URL schemes,
66
    * to make it possible to parse even lists of links separated by commas or
67
    * the like (http://domain.com,http://domain2.com).
68
    */
69
    protected function prepareSubject() : void
70
    {
71
        $replaces = array();
72
        
73
        foreach($this->schemes as $scheme)
74
        {
75
            $replaces[$scheme.':'] = PHP_EOL.$scheme.':';
76
        }
77
        
78
        $this->subject = str_replace(array_keys($replaces), array_values($replaces), $this->subject);
79
    }
80
    
81
   /**
82
    * Fetches all URLs that can be found in the subject string.
83
    * 
84
    * @return string[]
85
    * 
86
    * @see https://gist.github.com/gruber/249502
87
    */
88
    public function getURLs() : array
89
    {
90
        $this->prepareSubject();
91
        
92
        $matches = array();
93
        preg_match_all('#(?i)\b((?:[a-z][\w-]+:(?:/{1,3}|[a-z0-9%])|www\d{0,3}[.]|[a-z0-9.\-]+[.][a-z]{2,4}/)(?:[^\s()<>]+|\(([^\s()<>]+|(\([^\s()<>]+\)))*\))+(?:\(([^\s()<>]+|(\([^\s()<>]+\)))*\)|[^\s`!()\[\]{};:\'".,<>?«»“”‘’]))#i', $this->subject, $matches, PREG_PATTERN_ORDER);
94
        
95
        $result = array();
96
        
97
        if(is_array($matches))
98
        {
99
            foreach($matches[0] as $match)
100
            {
101
                if(strstr($match, '://') && !in_array($match, $result))
102
                {
103
                    $result[] = $match;
104
                }
105
            }
106
        }
107
        
108
        if($this->sorting)
109
        {
110
            usort($result, function(string $a, string $b) {
111
                return strnatcasecmp($a, $b);
112
            });
113
        }
114
        
115
        return $result;
116
    }
117
    
118
   /**
119
    * Retrieves all URLs as URLInfo instances.
120
    * 
121
    * @return URLInfo[]
122
    */
123
    public function getInfos()
124
    {
125
        $urls = $this->getURLs();
126
        
127
        $result = array();
128
        
129
        foreach($urls as $url)
130
        {
131
            $result[] = parseURL($url);
132
        }
133
        
134
        return $result;
135
    }
136
}
137