Complex classes like Uri often do a lot of different things. To break such a class down, we need to identify a cohesive component within that class. A common approach to find such a component is to look for fields/methods that share the same prefixes, or suffixes. You can also have a look at the cohesion graph to spot any un-connected, or weakly-connected components.
Once you have determined the fields that belong together, you can apply the Extract Class refactoring. If the component makes sense as a sub-class, Extract Subclass is also a candidate, and is often faster.
While breaking up the class, it is a good idea to analyze how other classes use Uri, and based on these observations, apply Extract Interface, too.
| 1 | <?php |
||
| 8 | class Uri implements UriInterface |
||
| 9 | { |
||
| 10 | use SchemePortsTrait; |
||
| 11 | |||
| 12 | protected $uri_parts = array( |
||
| 13 | "scheme" => "", |
||
| 14 | "hier_part" => "", |
||
| 15 | "authority" => "", |
||
| 16 | "user_info" => "", |
||
| 17 | "host" => "", |
||
| 18 | "port" => null, |
||
| 19 | "path" => "", |
||
| 20 | "query" => "", |
||
| 21 | "fragment" => "", |
||
| 22 | ); |
||
| 23 | |||
| 24 | protected $sub_delims = array( |
||
| 25 | "!", |
||
| 26 | "$", |
||
| 27 | "&", |
||
| 28 | "'", |
||
| 29 | "(", |
||
| 30 | ")", |
||
| 31 | "*", |
||
| 32 | "+", |
||
| 33 | ",", |
||
| 34 | ";", |
||
| 35 | "=", |
||
| 36 | ); |
||
| 37 | |||
| 38 | protected $pchar_unencoded = array( |
||
| 39 | ":", |
||
| 40 | "@", |
||
| 41 | ); |
||
| 42 | |||
| 43 | /** |
||
| 44 | * Uri constructor. Accepts a string representing a URI and parses the string into the URI's component parts. |
||
| 45 | * |
||
| 46 | * @throws \InvalidArgumentException Throws an \InvalidArgumentException when its parameter is not a string |
||
| 47 | * @param string $uri |
||
| 48 | */ |
||
| 49 | 81 | public function __construct($uri) |
|
| 57 | |||
| 58 | /** |
||
| 59 | * @todo Add a distinct test for this outside of constructor test |
||
|
|
|||
| 60 | * Retrieve the parsed components of the URI string. |
||
| 61 | * |
||
| 62 | * If the class was provided an invalid URI string, URI components will be empty strings, except port, which will |
||
| 63 | * be null |
||
| 64 | * |
||
| 65 | * @return mixed[] |
||
| 66 | */ |
||
| 67 | 34 | public function getParsedUri() |
|
| 71 | |||
| 72 | /** |
||
| 73 | * Retrieve the scheme component of the URI. |
||
| 74 | * |
||
| 75 | * If no scheme is present, this method MUST return an empty string. |
||
| 76 | * |
||
| 77 | * The value returned MUST be normalized to lowercase, per RFC 3986 |
||
| 78 | * Section 3.1. |
||
| 79 | * |
||
| 80 | * The trailing ":" character is not part of the scheme and MUST NOT be |
||
| 81 | * added. |
||
| 82 | * |
||
| 83 | * @see https://tools.ietf.org/html/rfc3986#section-3.1 |
||
| 84 | * @return string The URI scheme. |
||
| 85 | */ |
||
| 86 | 3 | public function getScheme() |
|
| 90 | |||
| 91 | /** |
||
| 92 | * Retrieve the authority component of the URI. |
||
| 93 | * |
||
| 94 | * If no authority information is present, this method MUST return an empty |
||
| 95 | * string. |
||
| 96 | * |
||
| 97 | * The authority syntax of the URI is: |
||
| 98 | * |
||
| 99 | * <pre> |
||
| 100 | * [user-info@]host[:port] |
||
| 101 | * </pre> |
||
| 102 | * |
||
| 103 | * If the port component is not set or is the standard port for the current |
||
| 104 | * scheme, it SHOULD NOT be included. |
||
| 105 | * |
||
| 106 | * @see https://tools.ietf.org/html/rfc3986#section-3.2 |
||
| 107 | * @return string The URI authority, in "[user-info@]host[:port]" format. |
||
| 108 | */ |
||
| 109 | 4 | public function getAuthority() |
|
| 125 | |||
| 126 | /** |
||
| 127 | * Retrieve the user information component of the URI. |
||
| 128 | * |
||
| 129 | * If no user information is present, this method MUST return an empty |
||
| 130 | * string. |
||
| 131 | * |
||
| 132 | * If a user is present in the URI, this will return that value; |
||
| 133 | * additionally, if the password is also present, it will be appended to the |
||
| 134 | * user value, with a colon (":") separating the values. |
||
| 135 | * |
||
| 136 | * The trailing "@" character is not part of the user information and MUST |
||
| 137 | * NOT be added. |
||
| 138 | * |
||
| 139 | * @return string The URI user information, in "username[:password]" format. |
||
| 140 | */ |
||
| 141 | 2 | public function getUserInfo() |
|
| 145 | |||
| 146 | /** |
||
| 147 | * Retrieve the host component of the URI. |
||
| 148 | * |
||
| 149 | * If no host is present, this method MUST return an empty string. |
||
| 150 | * |
||
| 151 | * The value returned MUST be normalized to lowercase, per RFC 3986 |
||
| 152 | * Section 3.2.2. |
||
| 153 | * |
||
| 154 | * @see http://tools.ietf.org/html/rfc3986#section-3.2.2 |
||
| 155 | * @return string The URI host. |
||
| 156 | */ |
||
| 157 | 3 | public function getHost() |
|
| 161 | |||
| 162 | /** |
||
| 163 | * Retrieve the port component of the URI. |
||
| 164 | * |
||
| 165 | * If a port is present, and it is non-standard for the current scheme, |
||
| 166 | * this method MUST return it as an integer. If the port is the standard port |
||
| 167 | * used with the current scheme, this method SHOULD return null. |
||
| 168 | * |
||
| 169 | * If no port is present, and no scheme is present, this method MUST return |
||
| 170 | * a null value. |
||
| 171 | * |
||
| 172 | * If no port is present, but a scheme is present, this method MAY return |
||
| 173 | * the standard port for that scheme, but SHOULD return null. |
||
| 174 | * |
||
| 175 | * @return null|int The URI port. |
||
| 176 | */ |
||
| 177 | 4 | public function getPort() |
|
| 183 | |||
| 184 | /** |
||
| 185 | * Retrieve the path component of the URI. |
||
| 186 | * |
||
| 187 | * The path can either be empty or absolute (starting with a slash) or |
||
| 188 | * rootless (not starting with a slash). Implementations MUST support all |
||
| 189 | * three syntaxes. |
||
| 190 | * |
||
| 191 | * Normally, the empty path "" and absolute path "/" are considered equal as |
||
| 192 | * defined in RFC 7230 Section 2.7.3. But this method MUST NOT automatically |
||
| 193 | * do this normalization because in contexts with a trimmed base path, e.g. |
||
| 194 | * the front controller, this difference becomes significant. It's the task |
||
| 195 | * of the user to handle both "" and "/". |
||
| 196 | * |
||
| 197 | * The value returned MUST be percent-encoded, but MUST NOT double-encode |
||
| 198 | * any characters. To determine what characters to encode, please refer to |
||
| 199 | * RFC 3986, Sections 2 and 3.3. |
||
| 200 | * |
||
| 201 | * As an example, if the value should include a slash ("/") not intended as |
||
| 202 | * delimiter between path segments, that value MUST be passed in encoded |
||
| 203 | * form (e.g., "%2F") to the instance. |
||
| 204 | * |
||
| 205 | * @see https://tools.ietf.org/html/rfc3986#section-2 |
||
| 206 | * @see https://tools.ietf.org/html/rfc3986#section-3.3 |
||
| 207 | * @return string The URI path. |
||
| 208 | */ |
||
| 209 | 8 | public function getPath() |
|
| 222 | |||
| 223 | /** |
||
| 224 | * Retrieve the query string of the URI. |
||
| 225 | * |
||
| 226 | * If no query string is present, this method MUST return an empty string. |
||
| 227 | * |
||
| 228 | * The leading "?" character is not part of the query and MUST NOT be |
||
| 229 | * added. |
||
| 230 | * |
||
| 231 | * The value returned MUST be percent-encoded, but MUST NOT double-encode |
||
| 232 | * any characters. To determine what characters to encode, please refer to |
||
| 233 | * RFC 3986, Sections 2 and 3.4. |
||
| 234 | * |
||
| 235 | * As an example, if a value in a key/value pair of the query string should |
||
| 236 | * include an ampersand ("&") not intended as a delimiter between values, |
||
| 237 | * that value MUST be passed in encoded form (e.g., "%26") to the instance. |
||
| 238 | * |
||
| 239 | * @see https://tools.ietf.org/html/rfc3986#section-2 |
||
| 240 | * @see https://tools.ietf.org/html/rfc3986#section-3.4 |
||
| 241 | * @return string The URI query string. |
||
| 242 | */ |
||
| 243 | 4 | public function getQuery() |
|
| 251 | |||
| 252 | /** |
||
| 253 | * Retrieve the fragment component of the URI. |
||
| 254 | * |
||
| 255 | * If no fragment is present, this method MUST return an empty string. |
||
| 256 | * |
||
| 257 | * The leading "#" character is not part of the fragment and MUST NOT be |
||
| 258 | * added. |
||
| 259 | * |
||
| 260 | * The value returned MUST be percent-encoded, but MUST NOT double-encode |
||
| 261 | * any characters. To determine what characters to encode, please refer to |
||
| 262 | * RFC 3986, Sections 2 and 3.5. |
||
| 263 | * |
||
| 264 | * @see https://tools.ietf.org/html/rfc3986#section-2 |
||
| 265 | * @see https://tools.ietf.org/html/rfc3986#section-3.5 |
||
| 266 | * @return string The URI fragment. |
||
| 267 | */ |
||
| 268 | 4 | public function getFragment() |
|
| 276 | |||
| 277 | /** |
||
| 278 | * Return an instance with the specified scheme. |
||
| 279 | * |
||
| 280 | * This method MUST retain the state of the current instance, and return |
||
| 281 | * an instance that contains the specified scheme. |
||
| 282 | * |
||
| 283 | * Implementations MUST support the schemes "http" and "https" case |
||
| 284 | * insensitively, and MAY accommodate other schemes if required. |
||
| 285 | * |
||
| 286 | * An empty scheme is equivalent to removing the scheme. |
||
| 287 | * |
||
| 288 | * @param string $scheme The scheme to use with the new instance. |
||
| 289 | * @return static A new instance with the specified scheme. |
||
| 290 | * @throws \InvalidArgumentException for invalid or unsupported schemes. |
||
| 291 | */ |
||
| 292 | public function withScheme($scheme) |
||
| 296 | |||
| 297 | /** |
||
| 298 | * Return an instance with the specified authority. |
||
| 299 | * |
||
| 300 | * This method MUST retain the state of the current instance, and return |
||
| 301 | * an instance that contains the specified authority. |
||
| 302 | * |
||
| 303 | * Replacing the authority is equivalent to replacing or removing all authority components depending upon the |
||
| 304 | * composition of the authority. |
||
| 305 | * |
||
| 306 | * An empty authority is equivalent to removing the authority and all authority components. |
||
| 307 | * |
||
| 308 | * @param string $authority The scheme to use with the new instance. |
||
| 309 | * @return static A new instance with the specified authority. |
||
| 310 | * @throws \InvalidArgumentException for invalid authorities. |
||
| 311 | */ |
||
| 312 | 22 | public function withAuthority($authority) |
|
| 329 | |||
| 330 | /** |
||
| 331 | * Return an instance with the specified user information. |
||
| 332 | * |
||
| 333 | * This method MUST retain the state of the current instance, and return |
||
| 334 | * an instance that contains the specified user information. |
||
| 335 | * |
||
| 336 | * Password is optional, but the user information MUST include the |
||
| 337 | * user; an empty string for the user is equivalent to removing user |
||
| 338 | * information. |
||
| 339 | * |
||
| 340 | * @param string $user The user name to use for authority. |
||
| 341 | * @param null|string $password The password associated with $user. |
||
| 342 | * @return static A new instance with the specified user information. |
||
| 343 | */ |
||
| 344 | public function withUserInfo($user, $password = null) |
||
| 348 | |||
| 349 | /** |
||
| 350 | * Return an instance with the specified host. |
||
| 351 | * |
||
| 352 | * This method MUST retain the state of the current instance, and return |
||
| 353 | * an instance that contains the specified host. |
||
| 354 | * |
||
| 355 | * An empty host value is equivalent to removing the host. |
||
| 356 | * |
||
| 357 | * @param string $host The hostname to use with the new instance. |
||
| 358 | * @return static A new instance with the specified host. |
||
| 359 | * @throws \InvalidArgumentException for invalid hostnames. |
||
| 360 | */ |
||
| 361 | public function withHost($host) |
||
| 365 | |||
| 366 | /** |
||
| 367 | * Return an instance with the specified port. |
||
| 368 | * |
||
| 369 | * This method MUST retain the state of the current instance, and return |
||
| 370 | * an instance that contains the specified port. |
||
| 371 | * |
||
| 372 | * Implementations MUST raise an exception for ports outside the |
||
| 373 | * established TCP and UDP port ranges. |
||
| 374 | * |
||
| 375 | * A null value provided for the port is equivalent to removing the port |
||
| 376 | * information. |
||
| 377 | * |
||
| 378 | * @param null|int $port The port to use with the new instance; a null value |
||
| 379 | * removes the port information. |
||
| 380 | * @return static A new instance with the specified port. |
||
| 381 | * @throws \InvalidArgumentException for invalid ports. |
||
| 382 | */ |
||
| 383 | public function withPort($port) |
||
| 387 | |||
| 388 | /** |
||
| 389 | * Return an instance with the specified path. |
||
| 390 | * |
||
| 391 | * This method MUST retain the state of the current instance, and return |
||
| 392 | * an instance that contains the specified path. |
||
| 393 | * |
||
| 394 | * The path can either be empty or absolute (starting with a slash) or |
||
| 395 | * rootless (not starting with a slash). Implementations MUST support all |
||
| 396 | * three syntaxes. |
||
| 397 | * |
||
| 398 | * If the path is intended to be domain-relative rather than path relative then |
||
| 399 | * it must begin with a slash ("/"). Paths not starting with a slash ("/") |
||
| 400 | * are assumed to be relative to some base path known to the application or |
||
| 401 | * consumer. |
||
| 402 | * |
||
| 403 | * Users can provide both encoded and decoded path characters. |
||
| 404 | * Implementations ensure the correct encoding as outlined in getPath(). |
||
| 405 | * |
||
| 406 | * @param string $path The path to use with the new instance. |
||
| 407 | * @return static A new instance with the specified path. |
||
| 408 | * @throws \InvalidArgumentException for invalid paths. |
||
| 409 | */ |
||
| 410 | 18 | public function withPath($path) |
|
| 430 | |||
| 431 | /** |
||
| 432 | * Return an instance with the specified query string. |
||
| 433 | * |
||
| 434 | * This method MUST retain the state of the current instance, and return |
||
| 435 | * an instance that contains the specified query string. |
||
| 436 | * |
||
| 437 | * Users can provide both encoded and decoded query characters. |
||
| 438 | * Implementations ensure the correct encoding as outlined in getQuery(). |
||
| 439 | * |
||
| 440 | * An empty query string value is equivalent to removing the query string. |
||
| 441 | * |
||
| 442 | * @param string $query The query string to use with the new instance. |
||
| 443 | * @return static A new instance with the specified query string. |
||
| 444 | * @throws \InvalidArgumentException for invalid query strings. |
||
| 445 | */ |
||
| 446 | public function withQuery($query) |
||
| 450 | |||
| 451 | /** |
||
| 452 | * Return an instance with the specified URI fragment. |
||
| 453 | * |
||
| 454 | * This method MUST retain the state of the current instance, and return |
||
| 455 | * an instance that contains the specified URI fragment. |
||
| 456 | * |
||
| 457 | * Users can provide both encoded and decoded fragment characters. |
||
| 458 | * Implementations ensure the correct encoding as outlined in getFragment(). |
||
| 459 | * |
||
| 460 | * An empty fragment value is equivalent to removing the fragment. |
||
| 461 | * |
||
| 462 | * @param string $fragment The fragment to use with the new instance. |
||
| 463 | * @return static A new instance with the specified fragment. |
||
| 464 | */ |
||
| 465 | public function withFragment($fragment) |
||
| 469 | |||
| 470 | /** |
||
| 471 | * Return the string representation as a URI reference. |
||
| 472 | * |
||
| 473 | * Depending on which components of the URI are present, the resulting |
||
| 474 | * string is either a full URI or relative reference according to RFC 3986, |
||
| 475 | * Section 4.1. The method concatenates the various components of the URI, |
||
| 476 | * using the appropriate delimiters: |
||
| 477 | * |
||
| 478 | * - If a scheme is present, it MUST be suffixed by ":". |
||
| 479 | * - If an authority is present, it MUST be prefixed by "//". |
||
| 480 | * - The path can be concatenated without delimiters. But there are two |
||
| 481 | * cases where the path has to be adjusted to make the URI reference |
||
| 482 | * valid as PHP does not allow to throw an exception in __toString(): |
||
| 483 | * - If the path is rootless and an authority is present, the path MUST |
||
| 484 | * be prefixed by "/". |
||
| 485 | * - If the path is starting with more than one "/" and no authority is |
||
| 486 | * present, the starting slashes MUST be reduced to one. |
||
| 487 | * - If a query is present, it MUST be prefixed by "?". |
||
| 488 | * - If a fragment is present, it MUST be prefixed by "#". |
||
| 489 | * |
||
| 490 | * @see http://tools.ietf.org/html/rfc3986#section-4.1 |
||
| 491 | * @return string |
||
| 492 | */ |
||
| 493 | 8 | public function __toString() |
|
| 497 | |||
| 498 | /** |
||
| 499 | * Converts a given array of URI parts to a string according to the specification of the __toString magic method |
||
| 500 | * |
||
| 501 | * @see Uri::__toString |
||
| 502 | * |
||
| 503 | * @param array $uri_parts The URI parts to be combined into a string |
||
| 504 | * @return string The string combined from the array of URI parts |
||
| 505 | */ |
||
| 506 | 31 | private function toString(array $uri_parts) |
|
| 522 | |||
| 523 | /** |
||
| 524 | * Splits a string URI into its component parts, returning true if the URI string matches a valid URI's syntax |
||
| 525 | * and false if the URI string does not |
||
| 526 | * |
||
| 527 | * @param string $uri The URI string to be decomposed |
||
| 528 | * @return bool Returns true if the URI string matches a valid URI's syntax |
||
| 529 | * Returns false otherwise |
||
| 530 | */ |
||
| 531 | 75 | private function explodeUri($uri) |
|
| 552 | |||
| 553 | /** |
||
| 554 | * Splits URI hierarchy data into authority and path data. |
||
| 555 | * |
||
| 556 | * @param string $hier_part The hierarchy part of a URI to be decomposed |
||
| 557 | * @return void |
||
| 558 | */ |
||
| 559 | 75 | private function explodeHierParts($hier_part) |
|
| 580 | |||
| 581 | /** |
||
| 582 | * Splits URI authority data into user info, host, and port data, returning an array with named keys. |
||
| 583 | * |
||
| 584 | * For the host component, it will capture everything within brackets to support ipv6 or match all characters until |
||
| 585 | * it finds a colon indicating the start of the port component. |
||
| 586 | * |
||
| 587 | * @param string $authority The authority part of a URI to be decomposed |
||
| 588 | * @return mixed[] An array with named keys containing the component parts of the supplied |
||
| 589 | * authority |
||
| 590 | */ |
||
| 591 | 74 | private function explodeAuthority($authority) |
|
| 609 | |||
| 610 | /** |
||
| 611 | * Normalizes a port string based on whether the URI's port is standard for its scheme |
||
| 612 | * |
||
| 613 | * @return int|null Returns null if the port is standard for the scheme |
||
| 614 | * Returns the port prepended with a colon if the port is not standard for the scheme |
||
| 615 | */ |
||
| 616 | 8 | private function normalizePort() |
|
| 630 | |||
| 631 | /** |
||
| 632 | * Sanitizes the URI component array by removing redundant key/value pairs |
||
| 633 | * |
||
| 634 | * @return void |
||
| 635 | */ |
||
| 636 | 75 | private function sanitizeUriPartsArray() |
|
| 642 | |||
| 643 | /** |
||
| 644 | * Percent encodes a component string except for sub-delims and unencoded pchar characters as defined by RFC 3986 |
||
| 645 | * in addition to any component-specific unencoded characters |
||
| 646 | * |
||
| 647 | * @param string $component_string The string representing a URI component |
||
| 648 | * @param string[] $component_unencoded [OPTIONAL] Any additional unencoded characters specific to the component |
||
| 649 | * |
||
| 650 | * @return string The string with appropriate characters percent-encoded |
||
| 651 | */ |
||
| 652 | 12 | private function encodeComponent($component_string, array $component_unencoded = array()) |
|
| 662 | |||
| 663 | /** |
||
| 664 | * Determines whether a string contains unallowed URI characters, provided a string of allowed characters for a |
||
| 665 | * given component. |
||
| 666 | * |
||
| 667 | * Note that a percent-encoded character (e.g. %20 for space) automatically counts as an allowed character, whereas |
||
| 668 | * a percent sign not followed by two hex digits (e.g. %2X) does not count as an allowed character. |
||
| 669 | * |
||
| 670 | * @param string $string The string to be checked for unallowed characters |
||
| 671 | * @param string $allowed A string containing all allowed characters for a given component |
||
| 672 | * |
||
| 673 | * @return bool Returns true if the string contains unallowed characters |
||
| 674 | * Returns false if the string contains only allowed characters (including percent-encoded |
||
| 675 | * characters) |
||
| 676 | */ |
||
| 677 | 8 | private function containsUnallowedUriCharacters($string, $allowed) |
|
| 687 | |||
| 688 | /** |
||
| 689 | * Returns the appropriate scheme string based upon __toString specification rules. |
||
| 690 | * |
||
| 691 | * @see Uri::__toString() |
||
| 692 | * |
||
| 693 | * @param string $scheme The scheme to compile into a URI-friendly string |
||
| 694 | * |
||
| 695 | * @return string The URI-friendly scheme string |
||
| 696 | */ |
||
| 697 | 31 | private function schemeToString($scheme) |
|
| 707 | |||
| 708 | /** |
||
| 709 | * Returns the appropriate authority string based upon __toString specification rules. |
||
| 710 | * |
||
| 711 | * @see Uri::__toString() |
||
| 712 | * |
||
| 713 | * @param string $authority The authority to compile into a URI-friendly string |
||
| 714 | * |
||
| 715 | * @return string The URI-friendly authority string |
||
| 716 | */ |
||
| 717 | 31 | private function authorityToString($authority) |
|
| 727 | |||
| 728 | /** |
||
| 729 | * Returns the appropriate path string based upon __toString specification rules. |
||
| 730 | * |
||
| 731 | * @see Uri::__toString() |
||
| 732 | * |
||
| 733 | * @param string $path The path to compile into a URI-friendly string |
||
| 734 | * @param string $authority [optional] The authority of the URI |
||
| 735 | * |
||
| 736 | * @return string The URI-friendly path string |
||
| 737 | */ |
||
| 738 | 31 | private function pathToString($path, $authority = "") |
|
| 757 | |||
| 758 | /** |
||
| 759 | * Returns the appropriate query string based upon __toString specification rules. |
||
| 760 | * |
||
| 761 | * @see Uri::__toString() |
||
| 762 | * |
||
| 763 | * @param string $query The query to compile into a URI-friendly string |
||
| 764 | * |
||
| 765 | * @return string The URI-friendly query string |
||
| 766 | */ |
||
| 767 | 31 | private function queryToString($query) |
|
| 777 | |||
| 778 | /** |
||
| 779 | * Returns the appropriate fragment string based upon __toString specification rules. |
||
| 780 | * |
||
| 781 | * @see Uri::__toString() |
||
| 782 | * |
||
| 783 | * @param string $fragment The fragment to compile into a URI-friendly string |
||
| 784 | * |
||
| 785 | * @return string The URI-friendly fragment string |
||
| 786 | */ |
||
| 787 | 31 | private function fragmentToString($fragment) |
|
| 797 | } |
||
| 798 |
This check looks
TODOcomments that have been left in the code.``TODO``s show that something is left unfinished and should be attended to.