This project does not seem to handle request data directly as such no vulnerable execution paths were found.
include
, or for example
via PHP's auto-loading mechanism.
These results are based on our legacy PHP analysis, consider migrating to our new PHP analysis engine instead. Learn more
1 | <?php |
||
2 | /** |
||
3 | * TLDExtract: Library for extraction of domain parts e.g. TLD. Domain parser that uses Public Suffix List. |
||
4 | * |
||
5 | * @link https://github.com/layershifter/TLDExtract |
||
6 | * |
||
7 | * @copyright Copyright (c) 2016, Alexander Fedyashov |
||
8 | * @license https://raw.githubusercontent.com/layershifter/TLDExtract/master/LICENSE Apache 2.0 License |
||
9 | */ |
||
10 | |||
11 | namespace LayerShifter\TLDExtract; |
||
12 | |||
13 | use LayerShifter\TLDDatabase\Store; |
||
14 | use LayerShifter\TLDExtract\Exceptions\RuntimeException; |
||
15 | use LayerShifter\TLDSupport\Helpers\Arr; |
||
16 | use LayerShifter\TLDSupport\Helpers\IP; |
||
17 | use LayerShifter\TLDSupport\Helpers\Str; |
||
18 | |||
19 | /** |
||
20 | * Extract class accurately extracts subdomain, domain and TLD components from URLs. |
||
21 | * |
||
22 | * @see Result for more information on the returned data structure. |
||
23 | */ |
||
24 | class Extract |
||
25 | { |
||
26 | |||
27 | /** |
||
28 | * @const int If this option provided, extract will consider ICCAN suffixes. |
||
29 | */ |
||
30 | const MODE_ALLOW_ICCAN = 2; |
||
31 | /** |
||
32 | * @const int If this option provided, extract will consider private suffixes. |
||
33 | */ |
||
34 | const MODE_ALLOW_PRIVATE = 4; |
||
35 | /** |
||
36 | * @const int If this option provided, extract will consider custom domains. |
||
37 | */ |
||
38 | const MODE_ALLOW_NOT_EXISTING_SUFFIXES = 8; |
||
39 | /** |
||
40 | * @const string RFC 3986 compliant scheme regex pattern. |
||
41 | * |
||
42 | * @see https://tools.ietf.org/html/rfc3986#section-3.1 |
||
43 | */ |
||
44 | const SCHEMA_PATTERN = '#^([a-zA-Z][a-zA-Z0-9+\-.]*:)?//#'; |
||
45 | |||
46 | /** |
||
47 | * @var int Value of extraction options. |
||
48 | */ |
||
49 | private $extractionMode; |
||
50 | /** |
||
51 | * @var string Name of class that will store results of parsing. |
||
52 | */ |
||
53 | private $resultClassName; |
||
54 | /** |
||
55 | * @var Store Object of TLDDatabase\Store class. |
||
56 | */ |
||
57 | private $suffixStore; |
||
58 | |||
59 | /** |
||
60 | * Factory constructor. |
||
61 | * |
||
62 | * @param null|string $databaseFile Optional, name of file with Public Suffix List database |
||
63 | * @param null|string $resultClassName Optional, name of class that will store results of parsing |
||
64 | * @param null|int $extractionMode Optional, option that will control extraction process |
||
65 | * |
||
66 | * @throws RuntimeException |
||
67 | */ |
||
68 | public function __construct($databaseFile = null, $resultClassName = null, $extractionMode = null) |
||
69 | { |
||
70 | $this->suffixStore = new Store($databaseFile); |
||
71 | $this->resultClassName = Result::class; |
||
72 | |||
73 | // Checks for resultClassName argument. |
||
74 | |||
75 | if (null !== $resultClassName) { |
||
76 | if (!class_exists($resultClassName)) { |
||
77 | throw new RuntimeException(sprintf('Class "%s" is not defined', $resultClassName)); |
||
78 | } |
||
79 | |||
80 | if (!in_array(ResultInterface::class, class_implements($resultClassName), true)) { |
||
81 | throw new RuntimeException(sprintf('Class "%s" not implements ResultInterface', $resultClassName)); |
||
82 | } |
||
83 | |||
84 | $this->resultClassName = $resultClassName; |
||
85 | } |
||
86 | |||
87 | $this->setExtractionMode($extractionMode); |
||
88 | } |
||
89 | |||
90 | /** |
||
91 | * Sets extraction mode, option that will control extraction process. |
||
92 | * |
||
93 | * @param int $extractionMode One of MODE_* constants |
||
94 | * |
||
95 | * @throws RuntimeException |
||
96 | */ |
||
97 | public function setExtractionMode($extractionMode = null) |
||
98 | { |
||
99 | if (null === $extractionMode) { |
||
100 | $this->extractionMode = static::MODE_ALLOW_ICCAN |
||
101 | | static::MODE_ALLOW_PRIVATE |
||
102 | | static::MODE_ALLOW_NOT_EXISTING_SUFFIXES; |
||
103 | |||
104 | return; |
||
105 | } |
||
106 | |||
107 | if (!is_int($extractionMode)) { |
||
108 | throw new RuntimeException('Invalid argument type, extractionMode must be integer'); |
||
109 | } |
||
110 | |||
111 | if (!in_array($extractionMode, [ |
||
112 | static::MODE_ALLOW_ICCAN, |
||
113 | static::MODE_ALLOW_PRIVATE, |
||
114 | static::MODE_ALLOW_NOT_EXISTING_SUFFIXES, |
||
115 | static::MODE_ALLOW_ICCAN | static::MODE_ALLOW_PRIVATE, |
||
116 | static::MODE_ALLOW_ICCAN | static::MODE_ALLOW_NOT_EXISTING_SUFFIXES, |
||
117 | static::MODE_ALLOW_ICCAN | static::MODE_ALLOW_PRIVATE | static::MODE_ALLOW_NOT_EXISTING_SUFFIXES, |
||
118 | static::MODE_ALLOW_PRIVATE | static::MODE_ALLOW_NOT_EXISTING_SUFFIXES |
||
119 | ], true) |
||
120 | ) { |
||
121 | throw new RuntimeException( |
||
122 | 'Invalid argument type, extractionMode must be one of defined constants of their combination' |
||
123 | ); |
||
124 | } |
||
125 | |||
126 | $this->extractionMode = $extractionMode; |
||
127 | } |
||
128 | |||
129 | /** |
||
130 | * Extract the subdomain, host and gTLD/ccTLD components from a URL. |
||
131 | * |
||
132 | * @param string $url URL that will be extracted |
||
133 | * |
||
134 | * @return ResultInterface |
||
135 | */ |
||
136 | public function parse($url) |
||
137 | { |
||
138 | $hostname = $this->extractHostname($url); |
||
139 | |||
140 | // If received hostname is valid IP address, result will be formed from it. |
||
141 | |||
142 | if (IP::isValid($hostname)) { |
||
143 | return new $this->resultClassName(null, $hostname, null); |
||
144 | } |
||
145 | |||
146 | list($subDomain, $host, $suffix) = $this->extractParts($hostname); |
||
147 | |||
148 | return new $this->resultClassName($subDomain, $host, $suffix); |
||
149 | } |
||
150 | |||
151 | /** |
||
152 | * Method that extracts the hostname or IP address from a URL. |
||
153 | * |
||
154 | * @param string $url URL for extraction |
||
155 | * |
||
156 | * @return null|string Hostname or IP address |
||
157 | */ |
||
158 | private function extractHostname($url) |
||
159 | { |
||
160 | $url = trim(Str::lower($url)); |
||
161 | |||
162 | // Removes scheme and path i.e. "https://github.com/layershifter" to "github.com/layershifter". |
||
163 | |||
164 | $url = preg_replace(static::SCHEMA_PATTERN, '', $url); |
||
165 | |||
166 | // Removes path and query part of URL i.e. "github.com/layershifter" to "github.com". |
||
167 | |||
168 | $url = $this->fixQueryPart($url); |
||
169 | $hostname = Arr::first(explode('/', $url, 2)); |
||
170 | |||
171 | // Removes username from URL i.e. [email protected] to github.com. |
||
172 | |||
173 | $hostname = Arr::last(explode('@', $hostname)); |
||
174 | |||
175 | // Remove ports from hosts, also check for IPv6 literals like "[3ffe:2a00:100:7031::1]". |
||
176 | // |
||
177 | // @see http://www.ietf.org/rfc/rfc2732.txt |
||
178 | |||
179 | $lastBracketPosition = Str::strrpos($hostname, ']'); |
||
180 | |||
181 | if ($lastBracketPosition !== false && Str::startsWith($hostname, '[')) { |
||
182 | return Str::substr($hostname, 1, $lastBracketPosition - 1); |
||
183 | } |
||
184 | |||
185 | // This is either a normal hostname or an IPv4 address, just remove the port. |
||
186 | |||
187 | $hostname = Arr::first(explode(':', $hostname)); |
||
188 | |||
189 | // If string is empty, null will be returned. |
||
190 | |||
191 | return '' === $hostname ? null : $hostname; |
||
192 | } |
||
193 | |||
194 | /** |
||
195 | * Extracts subdomain, host and suffix from input string. Based on algorithm described in |
||
196 | * https://publicsuffix.org/list/. |
||
197 | * |
||
198 | * @param string $hostname Hostname for extraction |
||
199 | * |
||
200 | * @return array|string[] An array that contains subdomain, host and suffix. |
||
201 | */ |
||
202 | public function extractParts($hostname) |
||
203 | { |
||
204 | $suffix = $this->extractSuffix($hostname); |
||
205 | |||
206 | if ($suffix === $hostname) { |
||
207 | return [null, $hostname, null]; |
||
208 | } |
||
209 | |||
210 | if (null !== $suffix) { |
||
211 | $hostname = Str::substr($hostname, 0, -Str::length($suffix) - 1); |
||
212 | } |
||
213 | |||
214 | $lastDot = Str::strrpos($hostname, '.'); |
||
215 | |||
216 | if (false === $lastDot) { |
||
217 | return [null, $hostname, $suffix]; |
||
218 | } |
||
219 | |||
220 | $subDomain = Str::substr($hostname, 0, $lastDot); |
||
0 ignored issues
–
show
|
|||
221 | $host = Str::substr($hostname, $lastDot + 1); |
||
222 | |||
223 | return [ |
||
224 | $subDomain, |
||
225 | $host, |
||
226 | $suffix |
||
227 | ]; |
||
228 | } |
||
229 | |||
230 | /** |
||
231 | * Extracts suffix from hostname using Public Suffix List database. |
||
232 | * |
||
233 | * @param string $hostname Hostname for extraction |
||
234 | * |
||
235 | * @return null|string |
||
236 | */ |
||
237 | private function extractSuffix($hostname) |
||
238 | { |
||
239 | // If hostname has leading dot, it's invalid. |
||
240 | // If hostname is a single label domain makes, it's invalid. |
||
241 | |||
242 | if (Str::startsWith($hostname, '.') || Str::strpos($hostname, '.') === false) { |
||
243 | return null; |
||
244 | } |
||
245 | |||
246 | // If domain is in punycode, it will be converted to IDN. |
||
247 | |||
248 | $isPunycoded = Str::strpos($hostname, 'xn--') !== false; |
||
249 | |||
250 | if ($isPunycoded) { |
||
251 | $hostname = idn_to_utf8($hostname); |
||
252 | } |
||
253 | |||
254 | $suffix = $this->parseSuffix($hostname); |
||
255 | |||
256 | if (null === $suffix) { |
||
257 | if (!($this->extractionMode & static::MODE_ALLOW_NOT_EXISTING_SUFFIXES)) { |
||
258 | return null; |
||
259 | } |
||
260 | |||
261 | $suffix = Str::substr($hostname, Str::strrpos($hostname, '.') + 1); |
||
262 | } |
||
263 | |||
264 | // If domain is punycoded, suffix will be converted to punycode. |
||
265 | |||
266 | return $isPunycoded ? idn_to_ascii($suffix) : $suffix; |
||
267 | } |
||
268 | |||
269 | /** |
||
270 | * Extracts suffix from hostname using Public Suffix List database. |
||
271 | * |
||
272 | * @param string $hostname Hostname for extraction |
||
273 | * |
||
274 | * @return null|string |
||
275 | */ |
||
276 | private function parseSuffix($hostname) |
||
277 | { |
||
278 | $hostnameParts = explode('.', $hostname); |
||
279 | $realSuffix = null; |
||
280 | |||
281 | for ($i = 0, $count = count($hostnameParts); $i < $count; $i++) { |
||
282 | $possibleSuffix = implode('.', array_slice($hostnameParts, $i)); |
||
283 | $exceptionSuffix = '!' . $possibleSuffix; |
||
284 | |||
285 | if ($this->suffixExists($exceptionSuffix)) { |
||
286 | $realSuffix = implode('.', array_slice($hostnameParts, $i + 1)); |
||
287 | |||
288 | break; |
||
289 | } |
||
290 | |||
291 | if ($this->suffixExists($possibleSuffix)) { |
||
292 | $realSuffix = $possibleSuffix; |
||
293 | |||
294 | break; |
||
295 | } |
||
296 | |||
297 | $wildcardTld = '*.' . implode('.', array_slice($hostnameParts, $i + 1)); |
||
298 | |||
299 | if ($this->suffixExists($wildcardTld)) { |
||
300 | $realSuffix = $possibleSuffix; |
||
301 | |||
302 | break; |
||
303 | } |
||
304 | } |
||
305 | |||
306 | return $realSuffix; |
||
307 | } |
||
308 | |||
309 | /** |
||
310 | * Method that checks existence of entry in Public Suffix List database, including provided options. |
||
311 | * |
||
312 | * @param string $entry Entry for check in Public Suffix List database |
||
313 | * |
||
314 | * @return bool |
||
315 | */ |
||
316 | private function suffixExists($entry) |
||
317 | { |
||
318 | if (!$this->suffixStore->isExists($entry)) { |
||
319 | return false; |
||
320 | } |
||
321 | |||
322 | $type = $this->suffixStore->getType($entry); |
||
323 | |||
324 | if ($this->extractionMode & static::MODE_ALLOW_ICCAN && $type === Store::TYPE_ICCAN) { |
||
325 | return true; |
||
326 | } |
||
327 | |||
328 | return $this->extractionMode & static::MODE_ALLOW_PRIVATE && $type === Store::TYPE_PRIVATE; |
||
329 | } |
||
330 | |||
331 | /** |
||
332 | * Fixes URL from "github.com?layershifter" to "github.com/?layershifter". |
||
333 | * |
||
334 | * @see https://github.com/layershifter/TLDExtract/issues/5 |
||
335 | * |
||
336 | * @param string $url |
||
337 | * |
||
338 | * @return string |
||
339 | */ |
||
340 | private function fixQueryPart($url) |
||
341 | { |
||
342 | $position = Str::strpos($url, '?'); |
||
343 | |||
344 | if ($position === false) { |
||
345 | return $url; |
||
346 | } |
||
347 | |||
348 | return Str::substr($url, 0, $position) . '/' . Str::substr($url, $position); |
||
0 ignored issues
–
show
It seems like
$position defined by \LayerShifter\TLDSupport...\Str::strpos($url, '?') on line 342 can also be of type boolean ; however, LayerShifter\TLDSupport\Helpers\Str::substr() does only seem to accept integer|null , maybe add an additional type check?
If a method or function can return multiple different values and unless you are sure that you only can receive a single value in this context, we recommend to add an additional type check: /**
* @return array|string
*/
function returnsDifferentValues($x) {
if ($x) {
return 'foo';
}
return array();
}
$x = returnsDifferentValues($y);
if (is_array($x)) {
// $x is an array.
}
If this a common case that PHP Analyzer should handle natively, please let us know by opening an issue. ![]() It seems like
$position defined by \LayerShifter\TLDSupport...\Str::strpos($url, '?') on line 342 can also be of type boolean ; however, LayerShifter\TLDSupport\Helpers\Str::substr() does only seem to accept integer , maybe add an additional type check?
If a method or function can return multiple different values and unless you are sure that you only can receive a single value in this context, we recommend to add an additional type check: /**
* @return array|string
*/
function returnsDifferentValues($x) {
if ($x) {
return 'foo';
}
return array();
}
$x = returnsDifferentValues($y);
if (is_array($x)) {
// $x is an array.
}
If this a common case that PHP Analyzer should handle natively, please let us know by opening an issue. ![]() |
|||
349 | } |
||
350 | } |
||
351 |
If a method or function can return multiple different values and unless you are sure that you only can receive a single value in this context, we recommend to add an additional type check:
If this a common case that PHP Analyzer should handle natively, please let us know by opening an issue.