1 | <?php |
||
26 | class Parser |
||
27 | { |
||
28 | /** |
||
29 | * @var string RFC 3986 compliant scheme regex pattern |
||
30 | * |
||
31 | * @see https://tools.ietf.org/html/rfc3986#section-3.1 |
||
32 | */ |
||
33 | const SCHEME_PATTERN = '#^([a-zA-Z][a-zA-Z0-9+\-.]*)://#'; |
||
34 | |||
35 | /** |
||
36 | * @var string IP address regex pattern |
||
37 | */ |
||
38 | const IP_ADDRESS_PATTERN = '/^(?:(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\.){3}(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)$/'; |
||
39 | |||
40 | /** |
||
41 | * @var PublicSuffixList Public Suffix List |
||
42 | */ |
||
43 | protected $publicSuffixList; |
||
44 | |||
45 | /** |
||
46 | * @var bool Whether or not a host part has been normalized |
||
47 | */ |
||
48 | protected $isNormalized = false; |
||
49 | |||
50 | /** |
||
51 | * @var PunycodeWrapper |
||
52 | */ |
||
53 | private $punycodeWrapper; |
||
54 | |||
55 | /** |
||
56 | * Public constructor. |
||
57 | * |
||
58 | * @codeCoverageIgnore |
||
59 | * |
||
60 | * @param PublicSuffixList $publicSuffixList Instance of PublicSuffixList |
||
61 | */ |
||
62 | public function __construct(PublicSuffixList $publicSuffixList) |
||
67 | |||
68 | /** |
||
69 | * Parses url. |
||
70 | * |
||
71 | * @param string $url Url to parse |
||
72 | * |
||
73 | * @return Url Object representation of url |
||
74 | */ |
||
75 | 233 | public function parseUrl($url): Url |
|
121 | |||
122 | /** |
||
123 | * Parses host part of url. |
||
124 | * |
||
125 | * @param string $host Host part of url |
||
126 | * |
||
127 | * @return Host Object representation of host portion of url |
||
128 | */ |
||
129 | 54 | public function parseHost(string $host): Host |
|
140 | |||
141 | /** |
||
142 | * Get the raw public suffix based on the cached public suffix list file. |
||
143 | * Return false if the provided suffix is not included in the PSL. |
||
144 | * |
||
145 | * @param string $host The host to process |
||
146 | * |
||
147 | * @return string|false The suffix or false if suffix not included in the PSL |
||
148 | */ |
||
149 | 11 | protected function getRawPublicSuffix(string $host) |
|
197 | |||
198 | /** |
||
199 | * Returns the public suffix portion of provided host. |
||
200 | * |
||
201 | * @param string $host host |
||
202 | * |
||
203 | * @return string|null public suffix or null if host does not contain a public suffix |
||
204 | */ |
||
205 | 56 | public function getPublicSuffix(string $host) |
|
233 | |||
234 | /** |
||
235 | * Is suffix valid? |
||
236 | * |
||
237 | * Validity determined by whether or not the suffix is included in the PSL. |
||
238 | * |
||
239 | * @param string $host Host part |
||
240 | * |
||
241 | * @return bool True is suffix is valid, false otherwise |
||
242 | */ |
||
243 | 2 | public function isSuffixValid(string $host): bool |
|
247 | |||
248 | /** |
||
249 | * Returns registrable domain portion of provided host. |
||
250 | * |
||
251 | * Per the test cases provided by Mozilla |
||
252 | * (http://mxr.mozilla.org/mozilla-central/source/netwerk/test/unit/data/test_psl.txt?raw=1), |
||
253 | * this method should return null if the domain provided is a public suffix. |
||
254 | * |
||
255 | * @param string|null $host host |
||
256 | * |
||
257 | * @return string|null registrable domain |
||
258 | */ |
||
259 | 55 | public function getRegistrableDomain($host) |
|
277 | |||
278 | /** |
||
279 | * Returns the subdomain portion of provided host. |
||
280 | * |
||
281 | * @param string $host host |
||
282 | * |
||
283 | * @return string|null subdomain |
||
284 | */ |
||
285 | 55 | public function getSubdomain(string $host) |
|
304 | |||
305 | /** |
||
306 | * If a URL is not punycoded, then it may be an IDNA URL, so it must be |
||
307 | * converted to ASCII. Performs conversion and sets flag. |
||
308 | * |
||
309 | * @param string $part Host part |
||
310 | * |
||
311 | * @return string Host part, transformed if not punycoded |
||
312 | */ |
||
313 | 11 | protected function normalize(string $part): string |
|
324 | |||
325 | /** |
||
326 | * Converts any normalized part back to IDNA. Performs conversion and |
||
327 | * resets flag. |
||
328 | * |
||
329 | * @param string $part Host part |
||
330 | * |
||
331 | * @return string Denormalized host part |
||
332 | */ |
||
333 | 11 | protected function denormalize(string $part): string |
|
342 | |||
343 | /** |
||
344 | * Tests host for presence of '.'. |
||
345 | * |
||
346 | * Related to #22 |
||
347 | * |
||
348 | * @param string|null $host Host part of url |
||
349 | * |
||
350 | * @return bool True if multi-label domain, false otherwise |
||
351 | */ |
||
352 | 11 | protected function isMultiLabelDomain($host): bool |
|
360 | |||
361 | /** |
||
362 | * Tests host to determine if it is an IP address. |
||
363 | * |
||
364 | * Related to #43 |
||
365 | * |
||
366 | * @param string $host Host part of url |
||
367 | * |
||
368 | * @return bool True if host is an ip address, false otherwise |
||
369 | */ |
||
370 | 11 | protected function isIpv4Address(string $host): bool |
|
374 | } |
||
375 |
In PHP, under loose comparison (like
==
, or!=
, orswitch
conditions), values of different types might be equal.For
string
values, the empty string''
is a special case, in particular the following results might be unexpected: