UriPattern   A
last analyzed

Complexity

Total Complexity 18

Size/Duplication

Total Lines 210
Duplicated Lines 0 %

Importance

Changes 8
Bugs 0 Features 0
Metric Value
eloc 75
c 8
b 0
f 0
dl 0
loc 210
rs 10
wmc 18

10 Methods

Rating   Name   Duplication   Size   Complexity  
A __construct() 0 6 2
A getNamedPatterns() 0 9 4
A allowNonAscii() 0 3 1
A matchRelativeUri() 0 3 1
A matchUri() 0 7 2
A matchScheme() 0 3 1
A matchAbsoluteUri() 0 3 1
A matchHost() 0 3 1
A buildPatterns() 0 65 1
A match() 0 14 4
1
<?php
2
3
namespace Riimu\Kit\UrlParser;
4
5
/**
6
 * Provides PCRE based matching for URIs.
7
 * @author Riikka Kalliomäki <[email protected]>
8
 * @copyright Copyright (c) 2015-2017 Riikka Kalliomäki
9
 * @license http://opensource.org/licenses/mit-license.php MIT License
10
 */
11
class UriPattern
12
{
13
    /** @var string PCRE pattern that conforms to the URI ABNF */
14
    private static $absoluteUri;
15
16
    /** @var string PCRE pattern that conforms to the relative-ref ABNF */
17
    private static $relativeUri;
18
19
    /** @var string PCRE pattern that conforms to the scheme ABNF */
20
    private static $scheme;
21
22
    /** @var string PCRE pattern that conforms to the host ABNF */
23
    private static $host;
24
25
    /** @var bool Whether non ascii characters are allowed or not */
26
    private $allowNonAscii;
27
28
    /**
29
     * Creates a new instance of UriPattern.
30
     */
31
    public function __construct()
32
    {
33
        $this->allowNonAscii = false;
34
35
        if (!isset(self::$absoluteUri)) {
36
            $this->buildPatterns();
37
        }
38
    }
39
40
    /**
41
     * Allows or forbids non ascii characters in some parts of the URI.
42
     *
43
     * When enabled, non ascii characters are allowed in `userinfo`, `reg_name`,
44
     * `path`, `query` and `fragment` parts of the URI. Note that pattern does
45
     * not verify whether the bytes actually form valid UTF-8 sequences or not.
46
     * Enabling this option simply allows bytes within the range of `x80-xFF`.
47
     *
48
     * @param bool $enabled True to allow, false to forbid
49
     */
50
    public function allowNonAscii($enabled = true)
51
    {
52
        $this->allowNonAscii = (bool) $enabled;
53
    }
54
55
    /**
56
     * Matches the string against URI or relative-ref ABNF.
57
     * @param string $uri The string to match
58
     * @param array $matches Provides the matched sub sections from the match
59
     * @return bool True if the URI matches, false if not
60
     */
61
    public function matchUri($uri, &$matches = [])
62
    {
63
        if ($this->matchAbsoluteUri($uri, $matches)) {
64
            return true;
65
        }
66
67
        return $this->matchRelativeUri($uri, $matches);
68
    }
69
70
    /**
71
     * Matches the string against the URI ABNF.
72
     * @param string $uri The string to match
73
     * @param array $matches Provides the matched sub sections from the match
74
     * @return bool True if the URI matches, false if not
75
     */
76
    public function matchAbsoluteUri($uri, &$matches = [])
77
    {
78
        return $this->match(self::$absoluteUri, $uri, $matches);
79
    }
80
81
    /**
82
     * Matches the string against the relative-ref ABNF.
83
     * @param string $uri The string to match
84
     * @param array $matches Provides the matched sub sections from the match
85
     * @return bool True if the URI matches, false if not
86
     */
87
    public function matchRelativeUri($uri, &$matches = [])
88
    {
89
        return $this->match(self::$relativeUri, $uri, $matches);
90
    }
91
92
    /**
93
     * Matches the string against the scheme ABNF.
94
     * @param string $scheme The string to match
95
     * @param array $matches Provides the matched sub sections from the match
96
     * @return bool True if the scheme matches, false if not
97
     */
98
    public function matchScheme($scheme, &$matches = [])
99
    {
100
        return $this->match(self::$scheme, $scheme, $matches);
101
    }
102
103
    /**
104
     * Matches the string against the host ABNF.
105
     * @param string $host The string to match
106
     * @param array $matches Provides the matched sub sections from the match
107
     * @return bool True if the host matches, false if not
108
     */
109
    public function matchHost($host, &$matches = [])
110
    {
111
        return $this->match(self::$host, $host, $matches);
112
    }
113
114
    /**
115
     * Matches the subject against the pattern and provides the literal sub patterns.
116
     * @param string $pattern The pattern to use for matching
117
     * @param string $subject The subject to match
118
     * @param array $matches The provided list of literal sub patterns
119
     * @return bool True if the pattern matches, false if not
120
     */
121
    private function match($pattern, $subject, &$matches)
122
    {
123
        $matches = [];
124
        $subject = (string) $subject;
125
126
        if ($this->allowNonAscii || preg_match('/^[\\x00-\\x7F]*$/', $subject)) {
127
            if (preg_match($pattern, $subject, $match)) {
128
                $matches = $this->getNamedPatterns($match);
129
130
                return true;
131
            }
132
        }
133
134
        return false;
135
    }
136
137
    /**
138
     * Returns nonempty named sub patterns from the match set.
139
     * @param array $matches Sub pattern matches
140
     * @return array<string,string> Nonempty named sub pattern matches
141
     */
142
    private function getNamedPatterns($matches)
143
    {
144
        foreach ($matches as $key => $value) {
145
            if (!is_string($key) || strlen($value) < 1) {
146
                unset($matches[$key]);
147
            }
148
        }
149
150
        return $matches;
151
    }
152
153
    /**
154
     * Builds the PCRE patterns according to the ABNF definitions.
155
     */
156
    private static function buildPatterns()
157
    {
158
        $alpha = 'A-Za-z';
159
        $digit = '0-9';
160
        $hex = $digit . 'A-Fa-f';
161
        $unreserved = "$alpha$digit\\-._~";
162
        $delimiters = "!$&'()*+,;=";
163
        $utf8 = '\\x80-\\xFF';
164
165
        $octet = "(?:[$digit]|[1-9][$digit]|1[$digit]{2}|2[0-4]$digit|25[0-5])";
166
        $ipv4address = "(?>$octet\\.$octet\\.$octet\\.$octet)";
167
168
        $encoded = "%[$hex]{2}";
169
        $h16 = "[$hex]{1,4}";
170
        $ls32 = "(?:$h16:$h16|$ipv4address)";
171
172
        $data = "[$unreserved$delimiters:@$utf8]++|$encoded";
173
174
        // Defining the scheme
175
        $scheme = "(?'scheme'(?>[$alpha][$alpha$digit+\\-.]*+))";
176
177
        // Defining the authority
178
        $ipv6address = "(?'IPv6address'" .
179
            "(?:(?:$h16:){6}$ls32)|" .
180
            "(?:::(?:$h16:){5}$ls32)|" .
181
            "(?:(?:$h16)?::(?:$h16:){4}$ls32)|" .
182
            "(?:(?:(?:$h16:){0,1}$h16)?::(?:$h16:){3}$ls32)|" .
183
            "(?:(?:(?:$h16:){0,2}$h16)?::(?:$h16:){2}$ls32)|" .
184
            "(?:(?:(?:$h16:){0,3}$h16)?::$h16:$ls32)|" .
185
            "(?:(?:(?:$h16:){0,4}$h16)?::$ls32)|" .
186
            "(?:(?:(?:$h16:){0,5}$h16)?::$h16)|" .
187
            "(?:(?:(?:$h16:){0,6}$h16)?::))";
188
189
        $regularName = "(?'reg_name'(?>(?:[$unreserved$delimiters$utf8]++|$encoded)*))";
190
191
        $ipvFuture = "(?'IPvFuture'v[$hex]++\\.[$unreserved$delimiters:]++)";
192
        $ipLiteral = "(?'IP_literal'\\[(?>$ipv6address|$ipvFuture)\\])";
193
194
        $port = "(?'port'(?>[$digit]*+))";
195
        $host = "(?'host'$ipLiteral|(?'IPv4address'$ipv4address)|$regularName)";
196
        $userInfo = "(?'userinfo'(?>(?:[$unreserved$delimiters:$utf8]++|$encoded)*))";
197
        $authority = "(?'authority'(?:$userInfo@)?$host(?::$port)?)";
198
199
        // Defining the path
200
        $segment = "(?>(?:$data)*)";
201
        $segmentNotEmpty = "(?>(?:$data)+)";
202
        $segmentNoScheme = "(?>([$unreserved$delimiters@$utf8]++|$encoded)+)";
203
204
        $pathAbsoluteEmpty = "(?'path_abempty'(?:/$segment)*)";
205
        $pathAbsolute = "(?'path_absolute'/(?:$segmentNotEmpty(?:/$segment)*)?)";
206
        $pathNoScheme = "(?'path_noscheme'$segmentNoScheme(?:/$segment)*)";
207
        $pathRootless = "(?'path_rootless'$segmentNotEmpty(?:/$segment)*)";
208
        $pathEmpty = "(?'path_empty')";
209
210
        // Defining other parts
211
        $query = "(?'query'(?>(?:$data|[/?])*))";
212
        $fragment = "(?'fragment'(?>(?:$data|[/?])*))";
213
214
        $absolutePath = "(?'hier_part'//$authority$pathAbsoluteEmpty|$pathAbsolute|$pathRootless|$pathEmpty)";
215
        $relativePath = "(?'relative_part'//$authority$pathAbsoluteEmpty|$pathAbsolute|$pathNoScheme|$pathEmpty)";
216
217
        self::$absoluteUri = "#^$scheme:$absolutePath(?:\\?$query)?(?:\\#$fragment)?$#";
218
        self::$relativeUri = "#^$relativePath(?:\\?$query)?(?:\\#$fragment)?$#";
219
        self::$scheme = "#^$scheme$#";
220
        self::$host = "#^$host$#";
221
    }
222
}
223