Completed
Push — master ( c07126...706ec6 )
by Jan-Petter
04:09
created

UriParser::validateHost()   B

Complexity

Conditions 6
Paths 12

Size

Total Lines 13
Code Lines 9

Duplication

Lines 4
Ratio 30.77 %

Importance

Changes 1
Bugs 1 Features 1
Metric Value
c 1
b 1
f 1
dl 4
loc 13
rs 8.8571
cc 6
eloc 9
nc 12
nop 1
1
<?php
2
namespace vipnytt\RobotsTxtParser\Parser;
3
4
use vipnytt\RobotsTxtParser\Exceptions\ClientException;
5
6
class UriParser
7
{
8
    /**
9
     * URI
10
     * @var string
11
     */
12
    private $uri;
13
14
    /**
15
     * UriParser constructor.
16
     *
17
     * @param $uri
18
     */
19
    public function __construct($uri)
20
    {
21
        $this->uri = $uri;
22
    }
23
24
    /**
25
     * Convert relative to full
26
     *
27
     * @param string $fallbackBase
28
     * @return string
29
     * @throws ClientException
30
     */
31
    public function convertToFull($fallbackBase)
32
    {
33
        $this->encode();
34
        if ($this->validate()) {
35
            return $this->uri;
36
        } elseif (strpos($this->uri, '/') === 0) {
37
            $relative = $this->uri;
38
            $this->uri = $fallbackBase;
39
            return $this->base() . $relative;
40
        }
41
        throw new ClientException("Invalid URI `$this->uri`");
42
    }
43
44
    /**
45
     * URI encoder according to RFC 3986
46
     * Returns a string containing the encoded URI with disallowed characters converted to their percentage encodings.
47
     * @link http://publicmind.in/blog/url-encoding/
48
     *
49
     * @return string
50
     */
51
    public function encode()
52
    {
53
        $reserved = [
54
            '!%21!ui' => "!",
55
            '!%23!ui' => "#",
56
            '!%24!ui' => "$",
57
            '!%25!ui' => "%",
58
            '!%26!ui' => "&",
59
            '!%27!ui' => "'",
60
            '!%28!ui' => "(",
61
            '!%29!ui' => ")",
62
            '!%2A!ui' => "*",
63
            '!%2B!ui' => "+",
64
            '!%2C!ui' => ",",
65
            '!%2F!ui' => "/",
66
            '!%3A!ui' => ":",
67
            '!%3B!ui' => ";",
68
            '!%3D!ui' => "=",
69
            '!%3F!ui' => "?",
70
            '!%40!ui' => "@",
71
            '!%5B!ui' => "[",
72
            '!%5D!ui' => "]",
73
        ];
74
        $this->uri = preg_replace(array_keys($reserved), array_values($reserved), rawurlencode($this->uri));
75
        return $this->baseToLowercase();
76
    }
77
78
    /**
79
     * Base uri to lowercase
80
     *
81
     * @return string
82
     */
83
    private function baseToLowercase()
84
    {
85
        if (($host = parse_url($this->uri, PHP_URL_HOST)) === null) {
86
            return $this->uri;
87
        }
88
        $pos = strpos($this->uri, $host) + strlen($host);
89
        return $this->uri = substr_replace($this->uri, strtolower(substr($this->uri, 0, $pos)), 0, $pos);
90
    }
91
92
    /**
93
     * Validate
94
     *
95
     * @return bool
96
     */
97
    public function validate()
98
    {
99
        return (
100
            (
101
                filter_var($this->uri, FILTER_VALIDATE_URL) ||
102
                // PHP 5.x bug fix: FILTER_VALIDATE_URL doesn't support IPv6 urls. IP check not needed in the future.
103
                $this->validateIP(($parsed = parse_url($this->uri, PHP_URL_HOST)) === false ? '' : $parsed)
104
            ) &&
105
            ($parsed = parse_url($this->uri)) !== false &&
106
            (
107
                $this->validateHost($parsed['host']) ||
108
                $this->validateIP($parsed['host'])
109
            ) &&
110
            $this->validateScheme($parsed['scheme'])
111
        );
112
    }
113
114
    /**
115
     * Validate IPv4 or IPv6
116
     *
117
     * @param  string|null $ipAddress
118
     * @return bool
119
     */
120
    public function validateIP($ipAddress = null)
121
    {
122 View Code Duplication
        if ($ipAddress === null) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
123
            $parsed = parse_url($this->uri);
124
            $ipAddress = isset($parsed['host']) ? $parsed['host'] : $parsed['path'];
125
        }
126
        return (
127
            filter_var($ipAddress, FILTER_VALIDATE_IP, FILTER_FLAG_IPV4) ||
128
            filter_var(trim($ipAddress, '[]'), FILTER_VALIDATE_IP, FILTER_FLAG_IPV6)
129
        );
130
    }
131
132
    /**
133
     * Validate host name
134
     *
135
     * @link http://stackoverflow.com/questions/1755144/how-to-validate-domain-name-in-php
136
     *
137
     * @param  string|null $host
138
     * @return bool
139
     */
140
    public function validateHost($host = null)
141
    {
142 View Code Duplication
        if ($host === null) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
143
            $parsed = parse_url($this->uri);
144
            $host = isset($parsed['host']) ? $parsed['host'] : $parsed['path'];
145
        }
146
        return (
147
            preg_match("/^([a-z\d](-*[a-z\d])*)(\.([a-z\d](-*[a-z\d])*))*$/i", $host) //valid chars check
148
            && preg_match("/^.{1,253}$/", $host) //overall length check
149
            && preg_match("/^[^\.]{1,63}(\.[^\.]{1,63})*$/", $host) //length of each label
150
            && !$this->validateIP($host)
151
        );
152
    }
153
154
    /**
155
     * Validate scheme
156
     *
157
     * @param  string|null $scheme
158
     * @return bool
159
     */
160
    public function validateScheme($scheme = null)
161
    {
162 View Code Duplication
        if ($scheme === null) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
163
            $parsed = parse_url($this->uri);
164
            $scheme = isset($parsed['host']) ? $parsed['host'] : $parsed['path'];
165
        }
166
        return in_array($scheme, [
167
                'http',
168
                'https',
169
                'ftp',
170
                'ftps',
171
                'sftp',
172
            ]
173
        );
174
    }
175
176
    /**
177
     * Base
178
     *
179
     * @return string
180
     * @throws ClientException
181
     */
182
    public function base()
183
    {
184
        if (!$this->validate()) {
185
            throw new ClientException('Invalid URI');
186
        }
187
        $parts = [
188
            'scheme' => parse_url($this->uri, PHP_URL_SCHEME),
189
            'host' => parse_url($this->uri, PHP_URL_HOST),
190
        ];
191
        $parts['port'] = is_int($port = parse_url($this->uri, PHP_URL_PORT)) ? $port : getservbyname($parts['scheme'], 'tcp');
192
        return strtolower($parts['scheme'] . '://' . $parts['host'] . ':' . $parts['port']);
193
    }
194
}
195