Completed
Branch 2.0-dev (d250b8)
by Jan-Petter
03:02
created

UrlParser::urlEncode()   B

Complexity

Conditions 1
Paths 1

Size

Total Lines 25
Code Lines 22

Duplication

Lines 0
Ratio 0 %

Importance

Changes 2
Bugs 0 Features 0
Metric Value
c 2
b 0
f 0
dl 0
loc 25
rs 8.8571
cc 1
eloc 22
nc 1
nop 1
1
<?php
2
namespace vipnytt\RobotsTxtParser\Parser;
3
4
use vipnytt\RobotsTxtParser\Exceptions\ClientException;
5
6
/**
7
 * Trait UrlParser
8
 *
9
 * @package vipnytt\RobotsTxtParser\Parser
10
 */
11
trait UrlParser
12
{
13
    /**
14
     * Convert relative to full URL
15
     *
16
     * @param string $url
17
     * @param string $base
18
     * @return string
19
     * @throws ClientException
20
     */
21
    private function urlConvertToFull($url, $base)
22
    {
23
        $url = $this->urlEncode($url);
24
        if ($this->urlValidate($url)) {
25
            return $url;
26
        } elseif (mb_stripos($url, '/') === 0) {
27
            return $this->urlBase($base) . $url;
28
        }
29
        throw new ClientException('Invalid URL');
30
    }
31
32
    /**
33
     * URL encoder according to RFC 3986
34
     * Returns a string containing the encoded URL with disallowed characters converted to their percentage encodings.
35
     * @link http://publicmind.in/blog/url-encoding/
36
     *
37
     * @param string $url
38
     * @return string
39
     */
40
    protected function urlEncode($url)
41
    {
42
        $reserved = [
43
            ":" => '!%3A!ui',
44
            "/" => '!%2F!ui',
45
            "?" => '!%3F!ui',
46
            "#" => '!%23!ui',
47
            "[" => '!%5B!ui',
48
            "]" => '!%5D!ui',
49
            "@" => '!%40!ui',
50
            "!" => '!%21!ui',
51
            "$" => '!%24!ui',
52
            "&" => '!%26!ui',
53
            "'" => '!%27!ui',
54
            "(" => '!%28!ui',
55
            ")" => '!%29!ui',
56
            "*" => '!%2A!ui',
57
            "+" => '!%2B!ui',
58
            "," => '!%2C!ui',
59
            ";" => '!%3B!ui',
60
            "=" => '!%3D!ui',
61
            "%" => '!%25!ui'
62
        ];
63
        return preg_replace(array_values($reserved), array_keys($reserved), rawurlencode($url));
64
    }
65
66
    /**
67
     * Validate URL
68
     *
69
     * @param string $url
70
     * @return bool
71
     */
72
    private function urlValidate($url)
73
    {
74
        return (
75
            filter_var($url, FILTER_VALIDATE_URL) &&
76
            ($parsed = parse_url($url)) !== false &&
77
            $this->urlValidateHost($parsed['host']) &&
78
            $this->urlValidateScheme($parsed['scheme'])
79
        );
80
    }
81
82
    /**
83
     * Validate host name
84
     *
85
     * @link http://stackoverflow.com/questions/1755144/how-to-validate-domain-name-in-php
86
     *
87
     * @param  string $host
88
     * @return bool
89
     */
90
    private static function urlValidateHost($host)
91
    {
92
        return (
93
            preg_match("/^([a-z\d](-*[a-z\d])*)(\.([a-z\d](-*[a-z\d])*))*$/i", $host) //valid chars check
94
            && preg_match("/^.{1,253}$/", $host) //overall length check
95
            && preg_match("/^[^\.]{1,63}(\.[^\.]{1,63})*$/", $host) //length of each label
96
            && !filter_var($host, FILTER_VALIDATE_IP) //is not an IP address
97
        );
98
    }
99
100
    /**
101
     * Validate URL scheme
102
     *
103
     * @param  string $scheme
104
     * @return bool
105
     */
106
    private static function urlValidateScheme($scheme)
107
    {
108
        return in_array(mb_strtolower($scheme), [
109
                'http',
110
                'https',
111
                'ftp',
112
                'sftp',
113
                'ftps',
114
            ]
115
        );
116
    }
117
118
    /**
119
     * Base URL
120
     *
121
     * @param string $url
122
     * @return string
123
     * @throws ClientException
124
     */
125
    protected function urlBase($url)
126
    {
127
        if ($this->urlValidate($url) === false) {
128
            throw new ClientException('Invalid URL');
129
        }
130
        $parts = [
131
            'scheme' => parse_url($url, PHP_URL_SCHEME),
132
            'host' => parse_url($url, PHP_URL_HOST),
133
        ];
134
        $parts['port'] = is_int($port = parse_url($url, PHP_URL_PORT)) ? $port : getservbyname($parts['scheme'], 'tcp');
135
        return $parts['scheme'] . '://' . $parts['host'] . ':' . $parts['port'];
136
    }
137
}
138