UriParser::base()   A
last analyzed

Complexity

Conditions 3
Paths 3

Size

Total Lines 12

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
dl 0
loc 12
rs 9.8666
c 0
b 0
f 0
cc 3
nc 3
nop 0
1
<?php
2
/**
3
 * vipnytt/RobotsTxtParser
4
 *
5
 * @link https://github.com/VIPnytt/RobotsTxtParser
6
 * @license https://github.com/VIPnytt/RobotsTxtParser/blob/master/LICENSE The MIT License (MIT)
7
 */
8
9
namespace vipnytt\RobotsTxtParser\Parser;
10
11
class UriParser
12
{
13
    /**
14
     * Scheme white-list
15
     * @var string[]
16
     */
17
    protected $schemes = [
18
        'http',
19
        'https',
20
        'ftp',
21
        'ftps',
22
        'sftp',
23
    ];
24
25
    /**
26
     * URI
27
     * @var string
28
     */
29
    private $uri;
30
31
    /**
32
     * UriParser constructor.
33
     *
34
     * @param $uri
35
     */
36
    public function __construct($uri)
37
    {
38
        $this->uri = $uri;
39
    }
40
41
    /**
42
     * Convert relative to full
43
     *
44
     * @param string $fallbackBase
45
     * @return string
46
     */
47
    public function convertToFull($fallbackBase)
48
    {
49
        $this->encode();
50
        if ($this->validate()) {
51
            return $this->uri;
52
        } elseif (strpos($this->uri, '/') === 0) {
53
            $relative = $this->uri;
54
            $this->uri = $fallbackBase;
55
            return $this->base() . $relative;
56
        }
57
        throw new \InvalidArgumentException("Invalid URI `$this->uri`");
58
    }
59
60
    /**
61
     * URI encoder according to RFC 3986
62
     * Returns a string containing the encoded URI with disallowed characters converted to their percentage encodings.
63
     * @link http://publicmind.in/blog/url-encoding/
64
     *
65
     * @return string
66
     */
67
    public function encode()
68
    {
69
        $reserved = [
70
            '!%21!ui' => "!",
71
            '!%23!ui' => "#",
72
            '!%24!ui' => "$",
73
            '!%26!ui' => "&",
74
            '!%27!ui' => "'",
75
            '!%28!ui' => "(",
76
            '!%29!ui' => ")",
77
            '!%2A!ui' => "*",
78
            '!%2B!ui' => "+",
79
            '!%2C!ui' => ",",
80
            '!%2F!ui' => "/",
81
            '!%3A!ui' => ":",
82
            '!%3B!ui' => ";",
83
            '!%3D!ui' => "=",
84
            '!%3F!ui' => "?",
85
            '!%40!ui' => "@",
86
            '!%5B!ui' => "[",
87
            '!%5D!ui' => "]",
88
            '!%25!ui' => "%",
89
        ];
90
        // The % character must be the last in the $reserved array.
91
        // This makes sure that the already encoded values are not lost or encoded again.
92
        $this->uri = preg_replace(array_keys($reserved), array_values($reserved), rawurlencode($this->uri));
93
        return $this->baseToLowercase();
94
    }
95
96
    /**
97
     * Base uri to lowercase
98
     *
99
     * @return string
100
     */
101
    private function baseToLowercase()
102
    {
103
        if (($host = parse_url($this->uri, PHP_URL_HOST)) === null) {
104
            return $this->uri;
105
        }
106
        $pos = strpos($this->uri, $host) + strlen($host);
107
        return $this->uri = substr_replace($this->uri, strtolower(substr($this->uri, 0, $pos)), 0, $pos);
108
    }
109
110
    /**
111
     * Validate
112
     *
113
     * @return bool
114
     */
115
    public function validate()
116
    {
117
        return (
118
            (
119
                filter_var($this->uri, FILTER_VALIDATE_URL) ||
120
                // PHP 5.x bug fix: FILTER_VALIDATE_URL doesn't support IPv6 urls. IP check not needed in the future.
121
                $this->validateIP(($parsed = parse_url($this->uri, PHP_URL_HOST)) === false ? '' : $parsed)
122
            ) &&
123
            ($parsed = parse_url($this->uri)) !== false &&
124
            (
125
                $this->validateHost($parsed['host']) ||
126
                $this->validateIP($parsed['host'])
127
            ) &&
128
            $this->validateScheme($parsed['scheme'])
129
        );
130
    }
131
132
    /**
133
     * Validate IPv4 or IPv6
134
     *
135
     * @param  string|null $ipAddress
136
     * @return bool
137
     */
138
    public function validateIP($ipAddress = null)
139
    {
140 View Code Duplication
        if ($ipAddress === null) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
141
            $parsed = parse_url($this->uri);
142
            $ipAddress = isset($parsed['host']) ? $parsed['host'] : null;
143
        }
144
        return (
145
            filter_var($ipAddress, FILTER_VALIDATE_IP, FILTER_FLAG_IPV4) ||
146
            filter_var(trim($ipAddress, '[]'), FILTER_VALIDATE_IP, FILTER_FLAG_IPV6)
147
        );
148
    }
149
150
    /**
151
     * Validate host name
152
     *
153
     * @link http://stackoverflow.com/questions/1755144/how-to-validate-domain-name-in-php
154
     *
155
     * @param  string|null $host
156
     * @return bool
157
     */
158
    public function validateHost($host = null)
159
    {
160 View Code Duplication
        if ($host === null) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
161
            $parsed = parse_url($this->uri);
162
            $host = isset($parsed['host']) ? $parsed['host'] : $parsed['path'];
163
        }
164
        return (
165
            preg_match("/^([a-z\d](-*[a-z\d])*)(\.([a-z\d](-*[a-z\d])*))*$/i", $host) //valid chars check
166
            && preg_match("/^.{1,253}$/", $host) //overall length check
167
            && preg_match("/^[^\.]{1,63}(\.[^\.]{1,63})*$/", $host) //length of each label
168
            && !$this->validateIP($host)
169
        );
170
    }
171
172
    /**
173
     * Validate scheme
174
     *
175
     * @param  string|null $scheme
176
     * @return bool
177
     */
178
    public function validateScheme($scheme = null)
179
    {
180 View Code Duplication
        if ($scheme === null) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
181
            $parsed = parse_url($this->uri);
182
            $scheme = isset($parsed['host']) ? $parsed['host'] : $parsed['path'];
183
        }
184
        return in_array($scheme, $this->schemes);
185
    }
186
187
    /**
188
     * Base
189
     *
190
     * @return string
191
     */
192
    public function base()
193
    {
194
        if (!$this->validate()) {
195
            throw new \InvalidArgumentException("Invalid URI: $this->uri");
196
        }
197
        $parts = [
198
            'scheme' => parse_url($this->uri, PHP_URL_SCHEME),
199
            'host' => parse_url($this->uri, PHP_URL_HOST),
200
        ];
201
        $parts['port'] = is_int($port = parse_url($this->uri, PHP_URL_PORT)) ? $port : getservbyname($parts['scheme'], 'tcp');
202
        return strtolower($parts['scheme'] . '://' . $parts['host'] . ':' . $parts['port']);
203
    }
204
205
    /**
206
     * Strip fragment
207
     *
208
     * @return string
209
     */
210
    public function stripFragment()
211
    {
212
        return explode('#', $this->uri, 2)[0];
213
    }
214
}
215