1 | <?php |
||
2 | |||
3 | namespace BenTools\UriFactory; |
||
4 | |||
5 | use Psr\Http\Message\UriInterface; |
||
6 | |||
7 | final class UriCanonicalizer |
||
8 | { |
||
9 | /** |
||
10 | * @param UriInterface $uri |
||
11 | * @return UriInterface |
||
12 | * @throws \InvalidArgumentException |
||
13 | */ |
||
14 | public static function canonicalize(UriInterface $uri): UriInterface |
||
15 | { |
||
16 | $uri = self::ensureIsPercentUnescaped($uri); |
||
17 | $uri = self::ensureSchemeIsNotBlank($uri); |
||
18 | $uri = self::removeUnwantedChars($uri); |
||
19 | $uri = self::removeFragment($uri); |
||
20 | $uri = self::removeLeadingAndTrailingDots($uri); |
||
21 | $uri = self::replaceConsecutiveDotsWithASingleDot($uri); |
||
22 | $uri = self::normalizeHostname($uri); |
||
23 | $uri = self::normalizePath($uri); |
||
24 | return $uri; |
||
25 | } |
||
26 | |||
27 | /** |
||
28 | * @param UriInterface $uri |
||
29 | * @return UriInterface |
||
30 | * @throws \InvalidArgumentException |
||
31 | */ |
||
32 | public static function ensureIsPercentUnescaped(UriInterface $uri): UriInterface |
||
33 | { |
||
34 | return $uri |
||
35 | ->withHost(self::percentUnescape($uri->getHost())) |
||
36 | ->withPath(self::percentUnescape($uri->getPath())) |
||
37 | ; |
||
38 | } |
||
39 | |||
40 | /** |
||
41 | * @param string $string |
||
42 | * @return string |
||
43 | */ |
||
44 | public static function percentUnescape(string $string): string |
||
45 | { |
||
46 | while ($string !== ($decoded = urldecode($string))) { |
||
47 | $string = $decoded; |
||
48 | } |
||
49 | return $string; |
||
50 | } |
||
51 | |||
52 | /** |
||
53 | * @param UriInterface $uri |
||
54 | * @return UriInterface |
||
55 | * @throws \InvalidArgumentException |
||
56 | */ |
||
57 | public static function ensureSchemeIsNotBlank(UriInterface $uri): UriInterface |
||
58 | { |
||
59 | return '' === $uri->getScheme() ? $uri->withScheme('http') : $uri; |
||
60 | } |
||
61 | |||
62 | /** |
||
63 | * @param UriInterface $uri |
||
64 | * @return UriInterface |
||
65 | */ |
||
66 | public static function removeFragment(UriInterface $uri): UriInterface |
||
67 | { |
||
68 | return $uri->withFragment(''); |
||
69 | } |
||
70 | |||
71 | /** |
||
72 | * @param UriInterface $uri |
||
73 | * @return UriInterface |
||
74 | * @throws \InvalidArgumentException |
||
75 | */ |
||
76 | public static function removeUnwantedChars(UriInterface $uri): UriInterface |
||
77 | { |
||
78 | $removeUnwantedChars = function (?string $string) { |
||
79 | if (null === $string) { |
||
80 | return null; |
||
81 | } |
||
82 | return str_replace(["\x09", "\x0A", "\x0D", "\x0B", "\t", "\r", "\n"], '', $string); |
||
83 | }; |
||
84 | |||
85 | return $uri |
||
86 | ->withUserInfo($removeUnwantedChars($uri->getUserInfo())) |
||
87 | ->withHost($removeUnwantedChars($uri->getHost())) |
||
88 | ->withPath($removeUnwantedChars($uri->getPath())) |
||
89 | ->withQuery($removeUnwantedChars($uri->getQuery())) |
||
90 | ->withFragment($removeUnwantedChars($uri->getFragment())); |
||
91 | } |
||
92 | |||
93 | /** |
||
94 | * @param UriInterface $uri |
||
95 | * @return UriInterface |
||
96 | * @throws \InvalidArgumentException |
||
97 | */ |
||
98 | public static function removeLeadingAndTrailingDots(UriInterface $uri): UriInterface |
||
99 | { |
||
100 | return $uri->withHost( |
||
101 | trim($uri->getHost(), '.') |
||
102 | ); |
||
103 | } |
||
104 | |||
105 | /** |
||
106 | * @param UriInterface $uri |
||
107 | * @return UriInterface |
||
108 | * @throws \InvalidArgumentException |
||
109 | */ |
||
110 | public static function replaceConsecutiveDotsWithASingleDot(UriInterface $uri): UriInterface |
||
111 | { |
||
112 | return $uri->withHost( |
||
113 | preg_replace('/\.{2,}/', '.', $uri->getHost()) |
||
114 | ); |
||
115 | } |
||
116 | |||
117 | /** |
||
118 | * @param UriInterface $uri |
||
119 | * @return UriInterface |
||
120 | * @throws \InvalidArgumentException |
||
121 | */ |
||
122 | public static function normalizeHostname(UriInterface $uri): UriInterface |
||
123 | { |
||
124 | $hostname = strtolower($uri->getHost()); |
||
125 | |||
126 | $hostnameIP = is_numeric($hostname) ? ip2long(long2ip($hostname)) : ip2long($hostname); |
||
0 ignored issues
–
show
Bug
introduced
by
![]() |
|||
127 | |||
128 | if (false !== $hostnameIP) { |
||
0 ignored issues
–
show
|
|||
129 | $hostname = long2ip($hostnameIP); |
||
130 | } |
||
131 | |||
132 | return $uri->withHost($hostname); |
||
133 | } |
||
134 | |||
135 | /** |
||
136 | * @param UriInterface $uri |
||
137 | * @return UriInterface |
||
138 | * @throws \InvalidArgumentException |
||
139 | */ |
||
140 | public static function normalizePath(UriInterface $uri): UriInterface |
||
141 | { |
||
142 | $path = $uri->getPath(); |
||
143 | $segments = explode('/', $path); |
||
144 | $parts = []; |
||
145 | foreach ($segments as $segment) { |
||
146 | switch ($segment) { |
||
147 | case '.': |
||
148 | // Don't need to do anything here |
||
149 | break; |
||
150 | case '..': |
||
151 | array_pop($parts); |
||
152 | break; |
||
153 | default: |
||
154 | $parts[] = $segment; |
||
155 | break; |
||
156 | } |
||
157 | } |
||
158 | $path = implode('/', $parts); |
||
159 | $path = preg_replace('#/{2,}#', '/', $path); |
||
160 | if (0 !== strpos($path, '/')) { |
||
161 | $path = '/' . $path; |
||
162 | } |
||
163 | return $uri->withPath($path); |
||
164 | } |
||
165 | } |
||
166 |