| @@ 11-136 (lines=126) @@ | ||
| 8 | * |
|
| 9 | * @package vipnytt\RobotsTxtParser\Modules |
|
| 10 | */ |
|
| 11 | trait UrlTools |
|
| 12 | { |
|
| 13 | /** |
|
| 14 | * Convert relative to full URL |
|
| 15 | * |
|
| 16 | * @param string $url |
|
| 17 | * @param string $base |
|
| 18 | * @return string |
|
| 19 | * @throws ClientException |
|
| 20 | */ |
|
| 21 | protected function urlConvertToFull($url, $base) |
|
| 22 | { |
|
| 23 | $url = $this->urlEncode($url); |
|
| 24 | if ($this->urlValidate($url)) { |
|
| 25 | return $url; |
|
| 26 | } elseif (mb_stripos($url, '/') === 0) { |
|
| 27 | return $this->urlBase($base) . $url; |
|
| 28 | } |
|
| 29 | throw new ClientException('Invalid URL'); |
|
| 30 | } |
|
| 31 | ||
| 32 | /** |
|
| 33 | * URL encoder according to RFC 3986 |
|
| 34 | * Returns a string containing the encoded URL with disallowed characters converted to their percentage encodings. |
|
| 35 | * @link http://publicmind.in/blog/url-encoding/ |
|
| 36 | * |
|
| 37 | * @param string $url |
|
| 38 | * @return string |
|
| 39 | */ |
|
| 40 | protected function urlEncode($url) |
|
| 41 | { |
|
| 42 | $reserved = [ |
|
| 43 | ":" => '!%3A!ui', |
|
| 44 | "/" => '!%2F!ui', |
|
| 45 | "?" => '!%3F!ui', |
|
| 46 | "#" => '!%23!ui', |
|
| 47 | "[" => '!%5B!ui', |
|
| 48 | "]" => '!%5D!ui', |
|
| 49 | "@" => '!%40!ui', |
|
| 50 | "!" => '!%21!ui', |
|
| 51 | "$" => '!%24!ui', |
|
| 52 | "&" => '!%26!ui', |
|
| 53 | "'" => '!%27!ui', |
|
| 54 | "(" => '!%28!ui', |
|
| 55 | ")" => '!%29!ui', |
|
| 56 | "*" => '!%2A!ui', |
|
| 57 | "+" => '!%2B!ui', |
|
| 58 | "," => '!%2C!ui', |
|
| 59 | ";" => '!%3B!ui', |
|
| 60 | "=" => '!%3D!ui', |
|
| 61 | "%" => '!%25!ui' |
|
| 62 | ]; |
|
| 63 | return preg_replace(array_values($reserved), array_keys($reserved), rawurlencode($url)); |
|
| 64 | } |
|
| 65 | ||
| 66 | /** |
|
| 67 | * Validate URL |
|
| 68 | * |
|
| 69 | * @param string $url |
|
| 70 | * @return bool |
|
| 71 | */ |
|
| 72 | protected function urlValidate($url) |
|
| 73 | { |
|
| 74 | return ( |
|
| 75 | filter_var($url, FILTER_VALIDATE_URL) && |
|
| 76 | ($parsed = parse_url($url)) !== false && |
|
| 77 | $this->urlValidateHost($parsed['host']) && |
|
| 78 | $this->urlValidateScheme($parsed['scheme']) |
|
| 79 | ); |
|
| 80 | } |
|
| 81 | ||
| 82 | /** |
|
| 83 | * Validate host name |
|
| 84 | * |
|
| 85 | * @link http://stackoverflow.com/questions/1755144/how-to-validate-domain-name-in-php |
|
| 86 | * |
|
| 87 | * @param string $host |
|
| 88 | * @return bool |
|
| 89 | */ |
|
| 90 | protected static function urlValidateHost($host) |
|
| 91 | { |
|
| 92 | return ( |
|
| 93 | preg_match("/^([a-z\d](-*[a-z\d])*)(\.([a-z\d](-*[a-z\d])*))*$/i", $host) //valid chars check |
|
| 94 | && preg_match("/^.{1,253}$/", $host) //overall length check |
|
| 95 | && preg_match("/^[^\.]{1,63}(\.[^\.]{1,63})*$/", $host) //length of each label |
|
| 96 | && !filter_var($host, FILTER_VALIDATE_IP) //is not an IP address |
|
| 97 | ); |
|
| 98 | } |
|
| 99 | ||
| 100 | /** |
|
| 101 | * Validate URL scheme |
|
| 102 | * |
|
| 103 | * @param string $scheme |
|
| 104 | * @return bool |
|
| 105 | */ |
|
| 106 | protected static function urlValidateScheme($scheme) |
|
| 107 | { |
|
| 108 | return in_array($scheme, [ |
|
| 109 | 'http', |
|
| 110 | 'https', |
|
| 111 | 'ftp', |
|
| 112 | 'sftp', |
|
| 113 | ] |
|
| 114 | ); |
|
| 115 | } |
|
| 116 | ||
| 117 | /** |
|
| 118 | * Base URL |
|
| 119 | * |
|
| 120 | * @param string $url |
|
| 121 | * @return string |
|
| 122 | * @throws ClientException |
|
| 123 | */ |
|
| 124 | protected function urlBase($url) |
|
| 125 | { |
|
| 126 | if ($this->urlValidate($url) === false) { |
|
| 127 | throw new ClientException('Invalid URL'); |
|
| 128 | } |
|
| 129 | $parts = [ |
|
| 130 | 'scheme' => parse_url($url, PHP_URL_SCHEME), |
|
| 131 | 'host' => parse_url($url, PHP_URL_HOST), |
|
| 132 | ]; |
|
| 133 | $parts['port'] = is_int($port = parse_url($url, PHP_URL_PORT)) ? $port : getservbyname($parts['scheme'], 'tcp'); |
|
| 134 | return $parts['scheme'] . '://' . $parts['host'] . ':' . $parts['port']; |
|
| 135 | } |
|
| 136 | } |
|
| 137 | ||
| @@ 11-136 (lines=126) @@ | ||
| 8 | * |
|
| 9 | * @package vipnytt\RobotsTxtParser\Parser |
|
| 10 | */ |
|
| 11 | trait UrlParser |
|
| 12 | { |
|
| 13 | /** |
|
| 14 | * Convert relative to full URL |
|
| 15 | * |
|
| 16 | * @param string $url |
|
| 17 | * @param string $base |
|
| 18 | * @return string |
|
| 19 | * @throws ClientException |
|
| 20 | */ |
|
| 21 | protected function urlConvertToFull($url, $base) |
|
| 22 | { |
|
| 23 | $url = $this->urlEncode($url); |
|
| 24 | if ($this->urlValidate($url)) { |
|
| 25 | return $url; |
|
| 26 | } elseif (mb_stripos($url, '/') === 0) { |
|
| 27 | return $this->urlBase($base) . $url; |
|
| 28 | } |
|
| 29 | throw new ClientException('Invalid URL'); |
|
| 30 | } |
|
| 31 | ||
| 32 | /** |
|
| 33 | * URL encoder according to RFC 3986 |
|
| 34 | * Returns a string containing the encoded URL with disallowed characters converted to their percentage encodings. |
|
| 35 | * @link http://publicmind.in/blog/url-encoding/ |
|
| 36 | * |
|
| 37 | * @param string $url |
|
| 38 | * @return string |
|
| 39 | */ |
|
| 40 | protected function urlEncode($url) |
|
| 41 | { |
|
| 42 | $reserved = [ |
|
| 43 | ":" => '!%3A!ui', |
|
| 44 | "/" => '!%2F!ui', |
|
| 45 | "?" => '!%3F!ui', |
|
| 46 | "#" => '!%23!ui', |
|
| 47 | "[" => '!%5B!ui', |
|
| 48 | "]" => '!%5D!ui', |
|
| 49 | "@" => '!%40!ui', |
|
| 50 | "!" => '!%21!ui', |
|
| 51 | "$" => '!%24!ui', |
|
| 52 | "&" => '!%26!ui', |
|
| 53 | "'" => '!%27!ui', |
|
| 54 | "(" => '!%28!ui', |
|
| 55 | ")" => '!%29!ui', |
|
| 56 | "*" => '!%2A!ui', |
|
| 57 | "+" => '!%2B!ui', |
|
| 58 | "," => '!%2C!ui', |
|
| 59 | ";" => '!%3B!ui', |
|
| 60 | "=" => '!%3D!ui', |
|
| 61 | "%" => '!%25!ui' |
|
| 62 | ]; |
|
| 63 | return preg_replace(array_values($reserved), array_keys($reserved), rawurlencode($url)); |
|
| 64 | } |
|
| 65 | ||
| 66 | /** |
|
| 67 | * Validate URL |
|
| 68 | * |
|
| 69 | * @param string $url |
|
| 70 | * @return bool |
|
| 71 | */ |
|
| 72 | protected function urlValidate($url) |
|
| 73 | { |
|
| 74 | return ( |
|
| 75 | filter_var($url, FILTER_VALIDATE_URL) && |
|
| 76 | ($parsed = parse_url($url)) !== false && |
|
| 77 | $this->urlValidateHost($parsed['host']) && |
|
| 78 | $this->urlValidateScheme($parsed['scheme']) |
|
| 79 | ); |
|
| 80 | } |
|
| 81 | ||
| 82 | /** |
|
| 83 | * Validate host name |
|
| 84 | * |
|
| 85 | * @link http://stackoverflow.com/questions/1755144/how-to-validate-domain-name-in-php |
|
| 86 | * |
|
| 87 | * @param string $host |
|
| 88 | * @return bool |
|
| 89 | */ |
|
| 90 | protected static function urlValidateHost($host) |
|
| 91 | { |
|
| 92 | return ( |
|
| 93 | preg_match("/^([a-z\d](-*[a-z\d])*)(\.([a-z\d](-*[a-z\d])*))*$/i", $host) //valid chars check |
|
| 94 | && preg_match("/^.{1,253}$/", $host) //overall length check |
|
| 95 | && preg_match("/^[^\.]{1,63}(\.[^\.]{1,63})*$/", $host) //length of each label |
|
| 96 | && !filter_var($host, FILTER_VALIDATE_IP) //is not an IP address |
|
| 97 | ); |
|
| 98 | } |
|
| 99 | ||
| 100 | /** |
|
| 101 | * Validate URL scheme |
|
| 102 | * |
|
| 103 | * @param string $scheme |
|
| 104 | * @return bool |
|
| 105 | */ |
|
| 106 | protected static function urlValidateScheme($scheme) |
|
| 107 | { |
|
| 108 | return in_array($scheme, [ |
|
| 109 | 'http', |
|
| 110 | 'https', |
|
| 111 | 'ftp', |
|
| 112 | 'sftp', |
|
| 113 | ] |
|
| 114 | ); |
|
| 115 | } |
|
| 116 | ||
| 117 | /** |
|
| 118 | * Base URL |
|
| 119 | * |
|
| 120 | * @param string $url |
|
| 121 | * @return string |
|
| 122 | * @throws ClientException |
|
| 123 | */ |
|
| 124 | protected function urlBase($url) |
|
| 125 | { |
|
| 126 | if ($this->urlValidate($url) === false) { |
|
| 127 | throw new ClientException('Invalid URL'); |
|
| 128 | } |
|
| 129 | $parts = [ |
|
| 130 | 'scheme' => parse_url($url, PHP_URL_SCHEME), |
|
| 131 | 'host' => parse_url($url, PHP_URL_HOST), |
|
| 132 | ]; |
|
| 133 | $parts['port'] = is_int($port = parse_url($url, PHP_URL_PORT)) ? $port : getservbyname($parts['scheme'], 'tcp'); |
|
| 134 | return $parts['scheme'] . '://' . $parts['host'] . ':' . $parts['port']; |
|
| 135 | } |
|
| 136 | } |
|
| 137 | ||