This project does not seem to handle request data directly as such no vulnerable execution paths were found.
include, or for example
via PHP's auto-loading mechanism.
These results are based on our legacy PHP analysis, consider migrating to our new PHP analysis engine instead. Learn more
| 1 | <?php |
||
| 2 | |||
| 3 | declare(strict_types=1); |
||
| 4 | |||
| 5 | /** |
||
| 6 | * PHP Domain Parser: Public Suffix List based URL parsing. |
||
| 7 | * |
||
| 8 | * @link http://github.com/jeremykendall/php-domain-parser for the canonical source repository |
||
| 9 | * |
||
| 10 | * @copyright Copyright (c) 2014 Jeremy Kendall (http://about.me/jeremykendall) |
||
| 11 | * @license http://github.com/jeremykendall/php-domain-parser/blob/master/LICENSE MIT License |
||
| 12 | */ |
||
| 13 | |||
| 14 | namespace Pdp; |
||
| 15 | |||
| 16 | use Pdp\Exception\SeriouslyMalformedUrlException; |
||
| 17 | use Pdp\Uri\Url; |
||
| 18 | use Pdp\Uri\Url\Host; |
||
| 19 | use voku\helper\UTF8; |
||
| 20 | |||
| 21 | /** |
||
| 22 | * Parser. |
||
| 23 | * |
||
| 24 | * This class is responsible for Public Suffix List based url parsing |
||
| 25 | */ |
||
| 26 | class Parser |
||
| 27 | { |
||
| 28 | /** |
||
| 29 | * @var string RFC 3986 compliant scheme regex pattern |
||
| 30 | * |
||
| 31 | * @see https://tools.ietf.org/html/rfc3986#section-3.1 |
||
| 32 | */ |
||
| 33 | const SCHEME_PATTERN = '#^([a-zA-Z][a-zA-Z0-9+\-.]*)://#'; |
||
| 34 | |||
| 35 | /** |
||
| 36 | * @var string IP address regex pattern |
||
| 37 | */ |
||
| 38 | const IP_ADDRESS_PATTERN = '/^(?:(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\.){3}(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)$/'; |
||
| 39 | |||
| 40 | /** |
||
| 41 | * @var PublicSuffixList Public Suffix List |
||
| 42 | */ |
||
| 43 | protected $publicSuffixList; |
||
| 44 | |||
| 45 | /** |
||
| 46 | * @var bool Whether or not a host part has been normalized |
||
| 47 | */ |
||
| 48 | protected $isNormalized = false; |
||
| 49 | |||
| 50 | /** |
||
| 51 | * @var PunycodeWrapper |
||
| 52 | */ |
||
| 53 | private $punycodeWrapper; |
||
| 54 | |||
| 55 | /** |
||
| 56 | * Public constructor. |
||
| 57 | * |
||
| 58 | * @codeCoverageIgnore |
||
| 59 | * |
||
| 60 | * @param PublicSuffixList $publicSuffixList Instance of PublicSuffixList |
||
| 61 | */ |
||
| 62 | public function __construct(PublicSuffixList $publicSuffixList) |
||
| 63 | { |
||
| 64 | $this->publicSuffixList = $publicSuffixList; |
||
| 65 | $this->punycodeWrapper = new PunycodeWrapper(); |
||
| 66 | } |
||
| 67 | |||
| 68 | /** |
||
| 69 | * Parses url. |
||
| 70 | * |
||
| 71 | * @param string $url Url to parse |
||
| 72 | * |
||
| 73 | * @return Url Object representation of url |
||
| 74 | */ |
||
| 75 | 233 | public function parseUrl($url): Url |
|
| 76 | { |
||
| 77 | 233 | $rawUrl = $url; |
|
| 78 | $elem = [ |
||
| 79 | 233 | 'scheme' => null, |
|
| 80 | 'user' => null, |
||
| 81 | 'pass' => null, |
||
| 82 | 'host' => null, |
||
| 83 | 'port' => null, |
||
| 84 | 'path' => null, |
||
| 85 | 'query' => null, |
||
| 86 | 'fragment' => null, |
||
| 87 | ]; |
||
| 88 | |||
| 89 | 233 | if (\preg_match(self::SCHEME_PATTERN, $url) === 0) { |
|
| 90 | // Wacky scheme required to overcome parse_url behavior in PHP |
||
| 91 | // See https://github.com/jeremykendall/php-domain-parser/issues/49 |
||
| 92 | 129 | $url = 'php-hack://' . \preg_replace('#^//#', '', $url, 1); |
|
| 93 | } |
||
| 94 | |||
| 95 | 233 | $parts = pdp_parse_url($url); |
|
| 96 | |||
| 97 | 233 | if ($parts === false || !isset($parts['host'])) { |
|
| 98 | 3 | throw new SeriouslyMalformedUrlException($rawUrl); |
|
| 99 | } |
||
| 100 | |||
| 101 | 230 | if ($parts['scheme'] === 'php-hack') { |
|
| 102 | 128 | $parts['scheme'] = null; |
|
| 103 | } |
||
| 104 | |||
| 105 | /** @noinspection AdditionOperationOnArraysInspection */ |
||
| 106 | 230 | $elem = (array)$parts + $elem; |
|
| 107 | |||
| 108 | 230 | $host = $this->parseHost($parts['host']); |
|
| 109 | |||
| 110 | 230 | return new Url( |
|
| 111 | 230 | $elem['scheme'], |
|
| 112 | 230 | $elem['user'], |
|
| 113 | 230 | $elem['pass'], |
|
| 114 | 230 | $host, |
|
| 115 | 230 | $elem['port'], |
|
| 116 | 230 | $elem['path'], |
|
| 117 | 230 | $elem['query'], |
|
| 118 | 230 | $elem['fragment'] |
|
| 119 | ); |
||
| 120 | } |
||
| 121 | |||
| 122 | /** |
||
| 123 | * Parses host part of url. |
||
| 124 | * |
||
| 125 | * @param string $host Host part of url |
||
| 126 | * |
||
| 127 | * @return Host Object representation of host portion of url |
||
| 128 | */ |
||
| 129 | 54 | public function parseHost(string $host): Host |
|
| 130 | { |
||
| 131 | 54 | $host = UTF8::strtolower($host); |
|
| 132 | |||
| 133 | 54 | return new Host( |
|
| 134 | 54 | $this->getSubdomain($host), |
|
| 135 | 54 | $this->getRegistrableDomain($host), |
|
| 136 | 54 | $this->getPublicSuffix($host), |
|
| 137 | 54 | $host |
|
| 138 | ); |
||
| 139 | } |
||
| 140 | |||
| 141 | /** |
||
| 142 | * Get the raw public suffix based on the cached public suffix list file. |
||
| 143 | * Return false if the provided suffix is not included in the PSL. |
||
| 144 | * |
||
| 145 | * @param string $host The host to process |
||
| 146 | * |
||
| 147 | * @return string|false The suffix or false if suffix not included in the PSL |
||
| 148 | */ |
||
| 149 | 11 | protected function getRawPublicSuffix(string $host) |
|
| 150 | { |
||
| 151 | 11 | $host = $this->normalize($host); |
|
| 152 | |||
| 153 | 11 | $parts = \array_reverse(\explode('.', $host)); |
|
| 154 | 11 | $publicSuffix = []; |
|
| 155 | 11 | $publicSuffixList = $this->publicSuffixList; |
|
| 156 | |||
| 157 | 11 | foreach ($parts as $part) { |
|
| 158 | if ( |
||
| 159 | 11 | \array_key_exists($part, $publicSuffixList) |
|
| 160 | && |
||
| 161 | 11 | \array_key_exists('!', $publicSuffixList[$part]) |
|
| 162 | ) { |
||
| 163 | 1 | break; |
|
| 164 | } |
||
| 165 | |||
| 166 | 11 | if (\array_key_exists($part, $publicSuffixList)) { |
|
| 167 | 11 | \array_unshift($publicSuffix, $part); |
|
| 168 | 11 | $publicSuffixList = $publicSuffixList[$part]; |
|
| 169 | 11 | continue; |
|
| 170 | } |
||
| 171 | |||
| 172 | 11 | if (\array_key_exists('*', $publicSuffixList)) { |
|
| 173 | 1 | \array_unshift($publicSuffix, $part); |
|
| 174 | 1 | $publicSuffixList = $publicSuffixList['*']; |
|
| 175 | 1 | continue; |
|
| 176 | } |
||
| 177 | |||
| 178 | // Avoids improper parsing when $host's subdomain + public suffix === |
||
| 179 | // a valid public suffix (e.g. host 'us.example.com' and public suffix 'us.com') |
||
| 180 | // |
||
| 181 | // Added by @goodhabit in https://github.com/jeremykendall/php-domain-parser/pull/15 |
||
| 182 | // Resolves https://github.com/jeremykendall/php-domain-parser/issues/16 |
||
| 183 | 11 | break; |
|
| 184 | } |
||
| 185 | |||
| 186 | // If empty, then the suffix is not included in the PSL and is |
||
| 187 | // considered "invalid". This also triggers algorithm rule #2: If no |
||
| 188 | // rules match, the prevailing rule is "*". |
||
| 189 | 11 | if (empty($publicSuffix)) { |
|
| 190 | 1 | return false; |
|
| 191 | } |
||
| 192 | |||
| 193 | 11 | $suffix = \implode('.', \array_filter($publicSuffix, '\strlen')); |
|
| 194 | |||
| 195 | 11 | return $this->denormalize($suffix); |
|
| 196 | } |
||
| 197 | |||
| 198 | /** |
||
| 199 | * Returns the public suffix portion of provided host. |
||
| 200 | * |
||
| 201 | * @param string $host host |
||
| 202 | * |
||
| 203 | * @return string|null public suffix or null if host does not contain a public suffix |
||
| 204 | */ |
||
| 205 | 56 | public function getPublicSuffix(string $host) |
|
| 206 | { |
||
| 207 | 56 | if (\strpos($host, '.') === 0) { |
|
| 208 | 1 | return null; |
|
| 209 | } |
||
| 210 | |||
| 211 | // Fixes #22: If a single label domain makes it this far (e.g., |
||
| 212 | // localhost, foo, etc.), this stops it from incorrectly being set as |
||
| 213 | // the public suffix. |
||
| 214 | 56 | if (!$this->isMultiLabelDomain($host)) { |
|
| 215 | 6 | return null; |
|
| 216 | } |
||
| 217 | |||
| 218 | // Fixes #43 |
||
| 219 | 50 | if ($this->isIpv4Address($host)) { |
|
| 220 | 3 | return null; |
|
| 221 | } |
||
| 222 | |||
| 223 | 47 | $suffix = $this->getRawPublicSuffix($host); |
|
| 224 | |||
| 225 | // Apply algorithm rule #2: If no rules match, the prevailing rule is "*". |
||
| 226 | 47 | if (false === $suffix) { |
|
| 227 | 3 | $parts = \array_reverse(explode('.', $host)); |
|
| 228 | 3 | $suffix = \array_shift($parts); |
|
| 229 | } |
||
| 230 | |||
| 231 | 47 | return $suffix; |
|
| 232 | } |
||
| 233 | |||
| 234 | /** |
||
| 235 | * Is suffix valid? |
||
| 236 | * |
||
| 237 | * Validity determined by whether or not the suffix is included in the PSL. |
||
| 238 | * |
||
| 239 | * @param string $host Host part |
||
| 240 | * |
||
| 241 | * @return bool True is suffix is valid, false otherwise |
||
| 242 | */ |
||
| 243 | 2 | public function isSuffixValid(string $host): bool |
|
| 244 | { |
||
| 245 | 2 | return $this->getRawPublicSuffix($host) !== false; |
|
| 246 | } |
||
| 247 | |||
| 248 | /** |
||
| 249 | * Returns registrable domain portion of provided host. |
||
| 250 | * |
||
| 251 | * Per the test cases provided by Mozilla |
||
| 252 | * (http://mxr.mozilla.org/mozilla-central/source/netwerk/test/unit/data/test_psl.txt?raw=1), |
||
| 253 | * this method should return null if the domain provided is a public suffix. |
||
| 254 | * |
||
| 255 | * @param string|null $host host |
||
| 256 | * |
||
| 257 | * @return string|null registrable domain |
||
| 258 | */ |
||
| 259 | 55 | public function getRegistrableDomain($host) |
|
| 260 | { |
||
| 261 | 55 | if (!$this->isMultiLabelDomain($host)) { |
|
| 262 | 7 | return null; |
|
| 263 | } |
||
| 264 | |||
| 265 | 49 | $publicSuffix = $this->getPublicSuffix($host); |
|
| 266 | |||
| 267 | 49 | if ($publicSuffix === null || $host == $publicSuffix) { |
|
| 268 | 4 | return null; |
|
| 269 | } |
||
| 270 | |||
| 271 | 46 | $publicSuffixParts = \array_reverse(\explode('.', $publicSuffix)); |
|
| 272 | 46 | $hostParts = \array_reverse(\explode('.', $host)); |
|
| 273 | 46 | $registrableDomainParts = $publicSuffixParts + \array_slice($hostParts, 0, \count($publicSuffixParts) + 1); |
|
| 274 | |||
| 275 | 46 | return \implode('.', \array_reverse($registrableDomainParts)); |
|
| 276 | } |
||
| 277 | |||
| 278 | /** |
||
| 279 | * Returns the subdomain portion of provided host. |
||
| 280 | * |
||
| 281 | * @param string $host host |
||
| 282 | * |
||
| 283 | * @return string|null subdomain |
||
| 284 | */ |
||
| 285 | 55 | public function getSubdomain(string $host) |
|
| 286 | { |
||
| 287 | 55 | $registrableDomain = $this->getRegistrableDomain($host); |
|
| 288 | |||
| 289 | 55 | if ($registrableDomain === null || $host === $registrableDomain) { |
|
| 290 | 30 | return null; |
|
| 291 | } |
||
| 292 | |||
| 293 | 28 | $registrableDomainParts = \array_reverse(\explode('.', $registrableDomain)); |
|
| 294 | |||
| 295 | 28 | $host = $this->normalize($host); |
|
| 296 | |||
| 297 | 28 | $hostParts = \array_reverse(\explode('.', $host)); |
|
| 298 | 28 | $subdomainParts = \array_slice($hostParts, \count($registrableDomainParts)); |
|
| 299 | |||
| 300 | 28 | $subdomain = \implode('.', \array_reverse($subdomainParts)); |
|
| 301 | |||
| 302 | 28 | return $this->denormalize($subdomain); |
|
| 303 | } |
||
| 304 | |||
| 305 | /** |
||
| 306 | * If a URL is not punycoded, then it may be an IDNA URL, so it must be |
||
| 307 | * converted to ASCII. Performs conversion and sets flag. |
||
| 308 | * |
||
| 309 | * @param string $part Host part |
||
| 310 | * |
||
| 311 | * @return string Host part, transformed if not punycoded |
||
| 312 | */ |
||
| 313 | 11 | protected function normalize(string $part): string |
|
| 314 | { |
||
| 315 | 11 | $punycoded = (\strpos($part, 'xn--') !== false); |
|
| 316 | |||
| 317 | 11 | if ($punycoded === false) { |
|
| 318 | 11 | $part = $this->punycodeWrapper->encode($part); |
|
| 319 | 11 | $this->isNormalized = true; |
|
| 320 | } |
||
| 321 | |||
| 322 | 11 | return \strtolower($part); |
|
| 323 | } |
||
| 324 | |||
| 325 | /** |
||
| 326 | * Converts any normalized part back to IDNA. Performs conversion and |
||
| 327 | * resets flag. |
||
| 328 | * |
||
| 329 | * @param string $part Host part |
||
| 330 | * |
||
| 331 | * @return string Denormalized host part |
||
| 332 | */ |
||
| 333 | 11 | protected function denormalize(string $part): string |
|
| 334 | { |
||
| 335 | 11 | if ($this->isNormalized === true) { |
|
| 336 | 11 | $part = $this->punycodeWrapper->decode($part); |
|
| 337 | 11 | $this->isNormalized = false; |
|
| 338 | } |
||
| 339 | |||
| 340 | 11 | return $part; |
|
| 341 | } |
||
| 342 | |||
| 343 | /** |
||
| 344 | * Tests host for presence of '.'. |
||
| 345 | * |
||
| 346 | * Related to #22 |
||
| 347 | * |
||
| 348 | * @param string|null $host Host part of url |
||
| 349 | * |
||
| 350 | * @return bool True if multi-label domain, false otherwise |
||
| 351 | */ |
||
| 352 | 11 | protected function isMultiLabelDomain($host): bool |
|
| 353 | { |
||
| 354 | 11 | if (!$host) { |
|
|
0 ignored issues
–
show
|
|||
| 355 | 1 | return false; |
|
| 356 | } |
||
| 357 | |||
| 358 | 11 | return \strpos($host, '.') !== false; |
|
| 359 | } |
||
| 360 | |||
| 361 | /** |
||
| 362 | * Tests host to determine if it is an IP address. |
||
| 363 | * |
||
| 364 | * Related to #43 |
||
| 365 | * |
||
| 366 | * @param string $host Host part of url |
||
| 367 | * |
||
| 368 | * @return bool True if host is an ip address, false otherwise |
||
| 369 | */ |
||
| 370 | 11 | protected function isIpv4Address(string $host): bool |
|
| 371 | { |
||
| 372 | 11 | return \preg_match(self::IP_ADDRESS_PATTERN, $host) === 1; |
|
| 373 | } |
||
| 374 | } |
||
| 375 |
In PHP, under loose comparison (like
==, or!=, orswitchconditions), values of different types might be equal.For
stringvalues, the empty string''is a special case, in particular the following results might be unexpected: