Duplicate code is one of the most pungent code smells. A rule that is often used is to re-structure code once it is duplicated in three or more places.
Common duplication problems, and corresponding solutions are:
Complex classes like Uri often do a lot of different things. To break such a class down, we need to identify a cohesive component within that class. A common approach to find such a component is to look for fields/methods that share the same prefixes, or suffixes. You can also have a look at the cohesion graph to spot any un-connected, or weakly-connected components.
Once you have determined the fields that belong together, you can apply the Extract Class refactoring. If the component makes sense as a sub-class, Extract Subclass is also a candidate, and is often faster.
While breaking up the class, it is a good idea to analyze how other classes use Uri, and based on these observations, apply Extract Interface, too.
1 | <?php |
||
13 | class Uri implements UriInterface |
||
14 | { |
||
15 | /** |
||
16 | * Absolute http and https URIs require a host per RFC 7230 Section 2.7 |
||
17 | * but in generic URIs the host can be empty. So for http(s) URIs |
||
18 | * we apply this default host when no host is given yet to form a |
||
19 | * valid URI. |
||
20 | */ |
||
21 | const HTTP_DEFAULT_HOST = 'localhost'; |
||
22 | |||
23 | private static $defaultPorts = [ |
||
24 | 'http' => 80, |
||
25 | 'https' => 443, |
||
26 | 'ftp' => 21, |
||
27 | 'gopher' => 70, |
||
28 | 'nntp' => 119, |
||
29 | 'news' => 119, |
||
30 | 'telnet' => 23, |
||
31 | 'tn3270' => 23, |
||
32 | 'imap' => 143, |
||
33 | 'pop' => 110, |
||
34 | 'ldap' => 389, |
||
35 | ]; |
||
36 | |||
37 | private static $charUnreserved = 'a-zA-Z0-9_\-\.~'; |
||
38 | private static $charSubDelims = '!\$&\'\(\)\*\+,;='; |
||
39 | private static $replaceQuery = ['=' => '%3D', '&' => '%26']; |
||
40 | |||
41 | /** @var string Uri scheme. */ |
||
42 | private $scheme = ''; |
||
43 | |||
44 | /** @var string Uri user info. */ |
||
45 | private $userInfo = ''; |
||
46 | |||
47 | /** @var string Uri host. */ |
||
48 | private $host = ''; |
||
49 | |||
50 | /** @var int|null Uri port. */ |
||
51 | private $port; |
||
52 | |||
53 | /** @var string Uri path. */ |
||
54 | private $path = ''; |
||
55 | |||
56 | /** @var string Uri query string. */ |
||
57 | private $query = ''; |
||
58 | |||
59 | /** @var string Uri fragment. */ |
||
60 | private $fragment = ''; |
||
61 | |||
62 | /** |
||
63 | * @param string $uri URI to parse |
||
64 | */ |
||
65 | public function __construct($uri = '') |
||
76 | |||
77 | public function __toString() |
||
87 | |||
88 | /** |
||
89 | * Composes a URI reference string from its various components. |
||
90 | * |
||
91 | * Usually this method does not need to be called manually but instead is used indirectly via |
||
92 | * `Psr\Http\Message\UriInterface::__toString`. |
||
93 | * |
||
94 | * PSR-7 UriInterface treats an empty component the same as a missing component as |
||
95 | * getQuery(), getFragment() etc. always return a string. This explains the slight |
||
96 | * difference to RFC 3986 Section 5.3. |
||
97 | * |
||
98 | * Another adjustment is that the authority separator is added even when the authority is missing/empty |
||
99 | * for the "file" scheme. This is because PHP stream functions like `file_get_contents` only work with |
||
100 | * `file:///myfile` but not with `file:/myfile` although they are equivalent according to RFC 3986. But |
||
101 | * `file:///` is the more common syntax for the file scheme anyway (Chrome for example redirects to |
||
102 | * that format). |
||
103 | * |
||
104 | * @param string $scheme |
||
105 | * @param string $authority |
||
106 | * @param string $path |
||
107 | * @param string $query |
||
108 | * @param string $fragment |
||
109 | * |
||
110 | * @return string |
||
111 | * |
||
112 | * @link https://tools.ietf.org/html/rfc3986#section-5.3 |
||
113 | */ |
||
114 | public static function composeComponents($scheme, $authority, $path, $query, $fragment) |
||
115 | { |
||
116 | $uri = ''; |
||
117 | |||
118 | // weak type checks to also accept null until we can add scalar type hints |
||
119 | if ($scheme != '') { |
||
120 | $uri .= $scheme . ':'; |
||
121 | } |
||
122 | |||
123 | if ($authority != ''|| $scheme === 'file') { |
||
124 | $uri .= '//' . $authority; |
||
125 | } |
||
126 | |||
127 | $uri .= $path; |
||
128 | |||
129 | if ($query != '') { |
||
130 | $uri .= '?' . $query; |
||
131 | } |
||
132 | |||
133 | if ($fragment != '') { |
||
134 | $uri .= '#' . $fragment; |
||
135 | } |
||
136 | |||
137 | return $uri; |
||
138 | } |
||
139 | |||
140 | /** |
||
141 | * Whether the URI has the default port of the current scheme. |
||
142 | * |
||
143 | * `Psr\Http\Message\UriInterface::getPort` may return null or the standard port. This method can be used |
||
144 | * independently of the implementation. |
||
145 | * |
||
146 | * @param UriInterface $uri |
||
147 | * |
||
148 | * @return bool |
||
149 | */ |
||
150 | public static function isDefaultPort(UriInterface $uri) |
||
151 | { |
||
152 | return $uri->getPort() === null |
||
153 | || (isset(self::$defaultPorts[$uri->getScheme()]) && $uri->getPort() === self::$defaultPorts[$uri->getScheme()]); |
||
154 | } |
||
155 | |||
156 | /** |
||
157 | * Whether the URI is absolute, i.e. it has a scheme. |
||
158 | * |
||
159 | * An instance of UriInterface can either be an absolute URI or a relative reference. This method returns true |
||
160 | * if it is the former. An absolute URI has a scheme. A relative reference is used to express a URI relative |
||
161 | * to another URI, the base URI. Relative references can be divided into several forms: |
||
162 | * - network-path references, e.g. '//example.com/path' |
||
163 | * - absolute-path references, e.g. '/path' |
||
164 | * - relative-path references, e.g. 'subpath' |
||
165 | * |
||
166 | * @param UriInterface $uri |
||
167 | * |
||
168 | * @return bool |
||
169 | * @see Uri::isNetworkPathReference |
||
170 | * @see Uri::isAbsolutePathReference |
||
171 | * @see Uri::isRelativePathReference |
||
172 | * @link https://tools.ietf.org/html/rfc3986#section-4 |
||
173 | */ |
||
174 | public static function isAbsolute(UriInterface $uri) |
||
175 | { |
||
176 | return $uri->getScheme() !== ''; |
||
177 | } |
||
178 | |||
179 | /** |
||
180 | * Whether the URI is a network-path reference. |
||
181 | * |
||
182 | * A relative reference that begins with two slash characters is termed an network-path reference. |
||
183 | * |
||
184 | * @param UriInterface $uri |
||
185 | * |
||
186 | * @return bool |
||
187 | * @link https://tools.ietf.org/html/rfc3986#section-4.2 |
||
188 | */ |
||
189 | public static function isNetworkPathReference(UriInterface $uri) |
||
190 | { |
||
191 | return $uri->getScheme() === '' && $uri->getAuthority() !== ''; |
||
192 | } |
||
193 | |||
194 | /** |
||
195 | * Whether the URI is a absolute-path reference. |
||
196 | * |
||
197 | * A relative reference that begins with a single slash character is termed an absolute-path reference. |
||
198 | * |
||
199 | * @param UriInterface $uri |
||
200 | * |
||
201 | * @return bool |
||
202 | * @link https://tools.ietf.org/html/rfc3986#section-4.2 |
||
203 | */ |
||
204 | public static function isAbsolutePathReference(UriInterface $uri) |
||
205 | { |
||
206 | return $uri->getScheme() === '' |
||
207 | && $uri->getAuthority() === '' |
||
208 | && isset($uri->getPath()[0]) |
||
209 | && $uri->getPath()[0] === '/'; |
||
210 | } |
||
211 | |||
212 | /** |
||
213 | * Whether the URI is a relative-path reference. |
||
214 | * |
||
215 | * A relative reference that does not begin with a slash character is termed a relative-path reference. |
||
216 | * |
||
217 | * @param UriInterface $uri |
||
218 | * |
||
219 | * @return bool |
||
220 | * @link https://tools.ietf.org/html/rfc3986#section-4.2 |
||
221 | */ |
||
222 | public static function isRelativePathReference(UriInterface $uri) |
||
223 | { |
||
224 | return $uri->getScheme() === '' |
||
225 | && $uri->getAuthority() === '' |
||
226 | && (!isset($uri->getPath()[0]) || $uri->getPath()[0] !== '/'); |
||
227 | } |
||
228 | |||
229 | /** |
||
230 | * Whether the URI is a same-document reference. |
||
231 | * |
||
232 | * A same-document reference refers to a URI that is, aside from its fragment |
||
233 | * component, identical to the base URI. When no base URI is given, only an empty |
||
234 | * URI reference (apart from its fragment) is considered a same-document reference. |
||
235 | * |
||
236 | * @param UriInterface $uri The URI to check |
||
237 | * @param UriInterface|null $base An optional base URI to compare against |
||
238 | * |
||
239 | * @return bool |
||
240 | * @link https://tools.ietf.org/html/rfc3986#section-4.4 |
||
241 | */ |
||
242 | public static function isSameDocumentReference(UriInterface $uri, UriInterface $base = null) |
||
243 | { |
||
244 | if ($base !== null) { |
||
245 | $uri = UriResolver::resolve($base, $uri); |
||
246 | |||
247 | return ($uri->getScheme() === $base->getScheme()) |
||
248 | && ($uri->getAuthority() === $base->getAuthority()) |
||
249 | && ($uri->getPath() === $base->getPath()) |
||
250 | && ($uri->getQuery() === $base->getQuery()); |
||
251 | } |
||
252 | |||
253 | return $uri->getScheme() === '' && $uri->getAuthority() === '' && $uri->getPath() === '' && $uri->getQuery() === ''; |
||
254 | } |
||
255 | |||
256 | /** |
||
257 | * Removes dot segments from a path and returns the new path. |
||
258 | * |
||
259 | * @param string $path |
||
260 | * |
||
261 | * @return string |
||
262 | * |
||
263 | * @deprecated since version 1.4. Use UriResolver::removeDotSegments instead. |
||
264 | * @see UriResolver::removeDotSegments |
||
265 | */ |
||
266 | public static function removeDotSegments($path) |
||
270 | |||
271 | /** |
||
272 | * Converts the relative URI into a new URI that is resolved against the base URI. |
||
273 | * |
||
274 | * @param UriInterface $base Base URI |
||
275 | * @param string|UriInterface $rel Relative URI |
||
276 | * |
||
277 | * @return UriInterface |
||
278 | * |
||
279 | * @deprecated since version 1.4. Use UriResolver::resolve instead. |
||
280 | * @see UriResolver::resolve |
||
281 | */ |
||
282 | public static function resolve(UriInterface $base, $rel) |
||
290 | |||
291 | /** |
||
292 | * Creates a new URI with a specific query string value removed. |
||
293 | * |
||
294 | * Any existing query string values that exactly match the provided key are |
||
295 | * removed. |
||
296 | * |
||
297 | * @param UriInterface $uri URI to use as a base. |
||
298 | * @param string $key Query string key to remove. |
||
299 | * |
||
300 | * @return UriInterface |
||
301 | */ |
||
302 | public static function withoutQueryValue(UriInterface $uri, $key) |
||
316 | |||
317 | /** |
||
318 | * Creates a new URI with a specific query string value. |
||
319 | * |
||
320 | * Any existing query string values that exactly match the provided key are |
||
321 | * removed and replaced with the given key value pair. |
||
322 | * |
||
323 | * A value of null will set the query string key without a value, e.g. "key" |
||
324 | * instead of "key=value". |
||
325 | * |
||
326 | * @param UriInterface $uri URI to use as a base. |
||
327 | * @param string $key Key to set. |
||
328 | * @param string|null $value Value to set |
||
329 | * |
||
330 | * @return UriInterface |
||
331 | */ |
||
332 | public static function withQueryValue(UriInterface $uri, $key, $value) |
||
358 | |||
359 | /** |
||
360 | * Creates a URI from a hash of `parse_url` components. |
||
361 | * |
||
362 | * @param array $parts |
||
363 | * |
||
364 | * @return UriInterface |
||
365 | * @link http://php.net/manual/en/function.parse-url.php |
||
366 | * |
||
367 | * @throws \InvalidArgumentException If the components do not form a valid URI. |
||
368 | */ |
||
369 | public static function fromParts(array $parts) |
||
377 | |||
378 | public function getScheme() |
||
382 | |||
383 | public function getAuthority() |
||
396 | |||
397 | public function getUserInfo() |
||
401 | |||
402 | public function getHost() |
||
406 | |||
407 | public function getPort() |
||
411 | |||
412 | public function getPath() |
||
416 | |||
417 | public function getQuery() |
||
421 | |||
422 | public function getFragment() |
||
426 | |||
427 | public function withScheme($scheme) |
||
442 | |||
443 | public function withUserInfo($user, $password = null) |
||
460 | |||
461 | public function withHost($host) |
||
475 | |||
476 | public function withPort($port) |
||
491 | |||
492 | public function withPath($path) |
||
506 | |||
507 | public function withQuery($query) |
||
520 | |||
521 | public function withFragment($fragment) |
||
534 | |||
535 | /** |
||
536 | * Apply parse_url parts to a URI. |
||
537 | * |
||
538 | * @param array $parts Array of parse_url parts to apply. |
||
539 | */ |
||
540 | private function applyParts(array $parts) |
||
567 | |||
568 | /** |
||
569 | * @param string $scheme |
||
570 | * |
||
571 | * @return string |
||
572 | * |
||
573 | * @throws \InvalidArgumentException If the scheme is invalid. |
||
574 | */ |
||
575 | private function filterScheme($scheme) |
||
583 | |||
584 | /** |
||
585 | * @param string $host |
||
586 | * |
||
587 | * @return string |
||
588 | * |
||
589 | * @throws \InvalidArgumentException If the host is invalid. |
||
590 | */ |
||
591 | private function filterHost($host) |
||
592 | { |
||
593 | if (!is_string($host)) { |
||
594 | throw new \InvalidArgumentException('Host must be a string'); |
||
595 | } |
||
596 | |||
597 | return strtolower($host); |
||
598 | } |
||
599 | |||
600 | /** |
||
601 | * @param int|null $port |
||
602 | * |
||
603 | * @return int|null |
||
604 | * |
||
605 | * @throws \InvalidArgumentException If the port is invalid. |
||
606 | */ |
||
607 | private function filterPort($port) |
||
622 | |||
623 | private function removeDefaultPort() |
||
624 | { |
||
625 | if ($this->port !== null && self::isDefaultPort($this)) { |
||
626 | $this->port = null; |
||
627 | } |
||
628 | } |
||
629 | |||
630 | /** |
||
631 | * Filters the path of a URI |
||
632 | * |
||
633 | * @param string $path |
||
634 | * |
||
635 | * @return string |
||
636 | * |
||
637 | * @throws \InvalidArgumentException If the path is invalid. |
||
638 | */ |
||
639 | View Code Duplication | private function filterPath($path) |
|
651 | |||
652 | /** |
||
653 | * Filters the query string or fragment of a URI. |
||
654 | * |
||
655 | * @param string $str |
||
656 | * |
||
657 | * @return string |
||
658 | * |
||
659 | * @throws \InvalidArgumentException If the query or fragment is invalid. |
||
660 | */ |
||
661 | View Code Duplication | private function filterQueryAndFragment($str) |
|
673 | |||
674 | private function rawurlencodeMatchZero(array $match) |
||
678 | |||
679 | private function validateState() |
||
702 | } |
||
703 |
Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.
You can also find more detailed suggestions in the “Code” section of your repository.