Completed
Push — master ( 3abe52...1cb1ee )
by Jan-Petter
02:13
created

UrlTools::urlConvertToFull()   A

Complexity

Conditions 3
Paths 3

Size

Total Lines 10
Code Lines 7

Duplication

Lines 0
Ratio 0 %

Importance

Changes 1
Bugs 1 Features 0
Metric Value
c 1
b 1
f 0
dl 0
loc 10
rs 9.4285
cc 3
eloc 7
nc 3
nop 2
1
<?php
2
namespace vipnytt\RobotsTxtParser\Modules;
3
4
use vipnytt\RobotsTxtParser\Exceptions\ClientException;
5
6
/**
7
 * Trait UrlTools
8
 *
9
 * @package vipnytt\RobotsTxtParser\Modules
10
 */
11
trait UrlTools
12
{
13
    /**
14
     * Convert relative to full URL
15
     *
16
     * @param string $url
17
     * @param string $base
18
     * @return string
19
     * @throws ClientException
20
     */
21
    protected function urlConvertToFull($url, $base)
22
    {
23
        $url = $this->urlEncode($url);
24
        if ($this->urlValidate($url)) {
25
            return $url;
26
        } elseif (mb_stripos($url, '/') === 0) {
27
            return $this->urlBase($base) . $url;
28
        }
29
        throw new ClientException('Invalid URL');
30
    }
31
32
    /**
33
     * URL encoder according to RFC 3986
34
     * Returns a string containing the encoded URL with disallowed characters converted to their percentage encodings.
35
     * @link http://publicmind.in/blog/url-encoding/
36
     *
37
     * @param string $url
38
     * @return string
39
     */
40
    protected function urlEncode($url)
41
    {
42
        $reserved = [
43
            ":" => '!%3A!ui',
44
            "/" => '!%2F!ui',
45
            "?" => '!%3F!ui',
46
            "#" => '!%23!ui',
47
            "[" => '!%5B!ui',
48
            "]" => '!%5D!ui',
49
            "@" => '!%40!ui',
50
            "!" => '!%21!ui',
51
            "$" => '!%24!ui',
52
            "&" => '!%26!ui',
53
            "'" => '!%27!ui',
54
            "(" => '!%28!ui',
55
            ")" => '!%29!ui',
56
            "*" => '!%2A!ui',
57
            "+" => '!%2B!ui',
58
            "," => '!%2C!ui',
59
            ";" => '!%3B!ui',
60
            "=" => '!%3D!ui',
61
            "%" => '!%25!ui'
62
        ];
63
        foreach ($reserved as $replace => $pattern) {
64
            $url = mb_ereg_replace($pattern, $replace, $url);
65
        }
66
        return $url;
67
    }
68
69
    /**
70
     * Validate URL
71
     *
72
     * @param string $url
73
     * @return bool
74
     */
75
    protected function urlValidate($url)
76
    {
77
        return (
78
            filter_var($url, FILTER_VALIDATE_URL) &&
79
            ($parsed = parse_url($url)) !== false &&
80
            $this->urlValidateHost($parsed['host']) &&
81
            $this->urlValidateScheme($parsed['scheme'])
82
        );
83
    }
84
85
    /**
86
     * Validate host name
87
     *
88
     * @link http://stackoverflow.com/questions/1755144/how-to-validate-domain-name-in-php
89
     *
90
     * @param  string $host
91
     * @return bool
92
     */
93
    protected static function urlValidateHost($host)
94
    {
95
        return (
96
            mb_ereg_match('^([a-z\d](-*[a-z\d])*)(\.([a-z\d](-*[a-z\d])*))*$', $host) && //valid chars check
97
            mb_ereg_match('^.{1,253}$', $host) && //overall length check
98
            mb_ereg_match('^[^\.]{1,63}(\.[^\.]{1,63})*$', $host) && //length of each label
99
            !filter_var($host, FILTER_VALIDATE_IP) //is not an IP address
100
        );
101
    }
102
103
    /**
104
     * Validate URL scheme
105
     *
106
     * @param  string $scheme
107
     * @return bool
108
     */
109
    protected static function urlValidateScheme($scheme)
110
    {
111
        return in_array($scheme, [
112
                'http',
113
                'https',
114
                'ftp',
115
                'sftp',
116
            ]
117
        );
118
    }
119
120
    /**
121
     * Base URL
122
     *
123
     * @param string $url
124
     * @return string
125
     * @throws ClientException
126
     */
127
    protected function urlBase($url)
128
    {
129
        if ($this->urlValidate($url) === false) {
130
            throw new ClientException('Invalid URL');
131
        }
132
        $parts = [
133
            'scheme' => parse_url($url, PHP_URL_SCHEME),
134
            'host' => parse_url($url, PHP_URL_HOST),
135
        ];
136
        $parts['port'] = is_int($port = parse_url($url, PHP_URL_PORT)) ? $port : getservbyname($parts['scheme'], 'tcp');
137
        return $parts['scheme'] . '://' . $parts['host'] . ':' . $parts['port'];
138
    }
139
}
140