Passed
Push — master ( a594cd...3c388a )
by Sebastian
03:13
created

ConvertHelper_URLFinder::getDefaultOptions()   A

Complexity

Conditions 1
Paths 1

Size

Total Lines 6
Code Lines 4

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
eloc 4
c 0
b 0
f 0
dl 0
loc 6
rs 10
cc 1
nc 1
nop 0
1
<?php
2
/**
3
 * File containing the {@see AppUtils\ConvertHelper_URLFinder} class.
4
 *
5
 * @package Application Utils
6
 * @subpackage ConvertHelper
7
 * @see AppUtils\ConvertHelper_URLFinder
8
 */
9
10
declare(strict_types=1);
11
12
namespace AppUtils;
13
14
/**
15
 * Can find any URLs in a string, be it plain text or HTML, XML.
16
 *
17
 * @package Application Utils
18
 * @subpackage ConvertHelper
19
 * @author Sebastian Mordziol <[email protected]>
20
 * 
21
 * @see ConvertHelper::createURLFinder()
22
 */
23
class ConvertHelper_URLFinder implements Interface_Optionable
24
{
25
    use Traits_Optionable;
26
    
27
   /**
28
    * @see https://gist.github.com/gruber/249502
29
    */
30
    const REGEX_URL = '#(?i)\b((?:[a-z][\w-]+:(?:/{1,3}|[a-z0-9%])|www\d{0,3}[.]|[a-z0-9.\-]+[.][a-z]{2,4}/)(?:[^\s()<>]+|\(([^\s()<>]+|(\([^\s()<>]+\)))*\))+(?:\(([^\s()<>]+|(\([^\s()<>]+\)))*\)|[^\s`!()\[\]{};:\'".,<>?«»“”‘’]))#i';
31
    
32
   /**
33
    * @var string
34
    */
35
    protected $subject;
36
37
   /**
38
    * @var string[]
39
    */
40
    protected $schemes = array(
41
        'http',
42
        'https',
43
        'ftp',
44
        'ftps',
45
        'mailto',
46
        'svn',
47
        'ssl',
48
        'tel',
49
    );
50
    
51
    public function __construct(string $subject)
52
    {
53
        $this->subject = $subject;
54
    }
55
    
56
    public function getDefaultOptions() : array
57
    {
58
        return array(
59
            'include-emails' => false,
60
            'omit-mailto' => false,
61
            'sorting' => false
62
        );
63
    }
64
    
65
   /**
66
    * Whether to enable sorting the URLs alphabetically (disabled by default).
67
    * 
68
    * @param bool $enabled
69
    * @return ConvertHelper_URLFinder
70
    */
71
    public function enableSorting(bool $enabled=true) : ConvertHelper_URLFinder
72
    {
73
        $this->setOption('sorting', $enabled);
74
        return $this;
75
    }
76
    
77
   /**
78
    * Whether to include email addresses in the search. 
79
    * This is only relevant when using the getURLs()
80
    * method.
81
    * 
82
    * @param bool $include
83
    * @return ConvertHelper_URLFinder
84
    */
85
    public function includeEmails(bool $include=true) : ConvertHelper_URLFinder
86
    {
87
        $this->setOption('include-emails', $include);
88
        return $this;
89
    }
90
    
91
   /**
92
    * Whether to omit the mailto: that is automatically added to all email addresses.
93
    * 
94
    * @param bool $omit
95
    * @return ConvertHelper_URLFinder
96
    */
97
    public function omitMailto(bool $omit=true) : ConvertHelper_URLFinder
98
    {
99
        $this->setOption('omit-mailto', $omit);
100
        return $this;
101
    }
102
    
103
   /**
104
    * Prepares the subject string by adding a newline before all URL schemes,
105
    * to make it possible to parse even lists of links separated by commas or
106
    * the like (http://domain.com,http://domain2.com).
107
    */
108
    private function prepareSubject() : void
109
    {
110
        $replaces = array();
111
        
112
        foreach($this->schemes as $scheme)
113
        {
114
            $replaces[$scheme.':'] = PHP_EOL.$scheme.':';
115
        }
116
        
117
        $this->subject = str_replace(array_keys($replaces), array_values($replaces), $this->subject);
118
    }
119
    
120
   /**
121
    * Fetches all URLs that can be found in the subject string.
122
    * 
123
    * @return string[]
124
    */
125
    public function getURLs() : array
126
    {
127
        $this->prepareSubject();
128
        
129
        $matches = array();
130
        preg_match_all(self::REGEX_URL, $this->subject, $matches, PREG_PATTERN_ORDER);
131
        
132
        $result = array();
133
        
134
        foreach($matches[0] as $match)
135
        {
136
            if(strstr($match, '://') && !in_array($match, $result))
137
            {
138
                $result[] = $match;
139
            }
140
        }
141
        
142
        if($this->getBoolOption('include-emails'))
143
        {
144
            $result = array_merge($result, $this->getEmails());
145
        }
146
        
147
        if($this->getBoolOption('sorting'))
148
        {
149
            usort($result, function(string $a, string $b) {
150
                return strnatcasecmp($a, $b);
151
            });
152
        }
153
        
154
        return $result;
155
    }
156
    
157
   /**
158
    * Retrieves all email addresses from the subject string.
159
    * 
160
    * @return string[]
161
    * 
162
    * @see omitMailto()
163
    */
164
    public function getEmails() : array
165
    {
166
        $matches = array();
167
        preg_match_all(RegexHelper::REGEX_EMAIL, $this->subject, $matches, PREG_PATTERN_ORDER);
168
        
169
        $result = array();
170
        $prefix = $this->getEmailPrefix();
171
        
172
        foreach($matches[0] as $email)
173
        {
174
            $result[] = $prefix.$email;
175
        }
176
        
177
        if($this->getBoolOption('sorting'))
178
        {
179
            usort($result, function(string $a, string $b) {
180
                return strnatcasecmp($a, $b);
181
            });
182
        }
183
        
184
        return $result;
185
    }
186
    
187
    private function getEmailPrefix() : string
188
    {
189
        if($this->getBoolOption('omit-mailto'))
190
        {
191
            return '';
192
        }
193
        
194
        return 'mailto:';
195
    }
196
    
197
   /**
198
    * Retrieves all URLs as URLInfo instances.
199
    * 
200
    * @return URLInfo[]
201
    */
202
    public function getInfos()
203
    {
204
        $urls = $this->getURLs();
205
        
206
        $result = array();
207
        
208
        foreach($urls as $url)
209
        {
210
            $result[] = parseURL($url);
211
        }
212
        
213
        return $result;
214
    }
215
}
216