Complex classes like UriString often do a lot of different things. To break such a class down, we need to identify a cohesive component within that class. A common approach to find such a component is to look for fields/methods that share the same prefixes, or suffixes. You can also have a look at the cohesion graph to spot any un-connected, or weakly-connected components.
Once you have determined the fields that belong together, you can apply the Extract Class refactoring. If the component makes sense as a sub-class, Extract Subclass is also a candidate, and is often faster.
While breaking up the class, it is a good idea to analyze how other classes use UriString, and based on these observations, apply Extract Interface, too.
| 1 | <?php |
||
| 60 | final class UriString |
||
| 61 | { |
||
| 62 | /** |
||
| 63 | * Default URI component values. |
||
| 64 | */ |
||
| 65 | private const URI_COMPONENTS = [ |
||
| 66 | 'scheme' => null, 'user' => null, 'pass' => null, 'host' => null, |
||
| 67 | 'port' => null, 'path' => '', 'query' => null, 'fragment' => null, |
||
| 68 | ]; |
||
| 69 | |||
| 70 | /** |
||
| 71 | * Simple URI which do not need any parsing. |
||
| 72 | */ |
||
| 73 | private const URI_SCHORTCUTS = [ |
||
| 74 | '' => [], |
||
| 75 | '#' => ['fragment' => ''], |
||
| 76 | '?' => ['query' => ''], |
||
| 77 | '?#' => ['query' => '', 'fragment' => ''], |
||
| 78 | '/' => ['path' => '/'], |
||
| 79 | '//' => ['host' => ''], |
||
| 80 | ]; |
||
| 81 | |||
| 82 | /** |
||
| 83 | * Range of invalid characters in URI string. |
||
| 84 | */ |
||
| 85 | private const REGEXP_INVALID_URI_CHARS = '/[\x00-\x1f\x7f]/'; |
||
| 86 | |||
| 87 | /** |
||
| 88 | * RFC3986 regular expression URI splitter. |
||
| 89 | * |
||
| 90 | * @link https://tools.ietf.org/html/rfc3986#appendix-B |
||
| 91 | */ |
||
| 92 | private const REGEXP_URI_PARTS = ',^ |
||
| 93 | (?<scheme>(?<scontent>[^:/?\#]+):)? # URI scheme component |
||
| 94 | (?<authority>//(?<acontent>[^/?\#]*))? # URI authority part |
||
| 95 | (?<path>[^?\#]*) # URI path component |
||
| 96 | (?<query>\?(?<qcontent>[^\#]*))? # URI query component |
||
| 97 | (?<fragment>\#(?<fcontent>.*))? # URI fragment component |
||
| 98 | ,x'; |
||
| 99 | |||
| 100 | /** |
||
| 101 | * URI scheme regular expresssion. |
||
| 102 | * |
||
| 103 | * @link https://tools.ietf.org/html/rfc3986#section-3.1 |
||
| 104 | */ |
||
| 105 | private const REGEXP_URI_SCHEME = '/^([a-z][a-z\d\+\.\-]*)?$/i'; |
||
| 106 | |||
| 107 | /** |
||
| 108 | * IPvFuture regular expression. |
||
| 109 | * |
||
| 110 | * @link https://tools.ietf.org/html/rfc3986#section-3.2.2 |
||
| 111 | */ |
||
| 112 | private const REGEXP_IP_FUTURE = '/^ |
||
| 113 | v(?<version>[A-F0-9])+\. |
||
| 114 | (?: |
||
| 115 | (?<unreserved>[a-z0-9_~\-\.])| |
||
| 116 | (?<sub_delims>[!$&\'()*+,;=:]) # also include the : character |
||
| 117 | )+ |
||
| 118 | $/ix'; |
||
| 119 | |||
| 120 | /** |
||
| 121 | * General registered name regular expression. |
||
| 122 | * |
||
| 123 | * @link https://tools.ietf.org/html/rfc3986#section-3.2.2 |
||
| 124 | */ |
||
| 125 | private const REGEXP_REGISTERED_NAME = '/(?(DEFINE) |
||
| 126 | (?<unreserved>[a-z0-9_~\-]) # . is missing as it is used to separate labels |
||
| 127 | (?<sub_delims>[!$&\'()*+,;=]) |
||
| 128 | (?<encoded>%[A-F0-9]{2}) |
||
| 129 | (?<reg_name>(?:(?&unreserved)|(?&sub_delims)|(?&encoded))*) |
||
| 130 | ) |
||
| 131 | ^(?:(?®_name)\.)*(?®_name)\.?$/ix'; |
||
| 132 | |||
| 133 | /** |
||
| 134 | * Invalid characters in host regular expression. |
||
| 135 | * |
||
| 136 | * @link https://tools.ietf.org/html/rfc3986#section-3.2.2 |
||
| 137 | */ |
||
| 138 | private const REGEXP_INVALID_HOST_CHARS = '/ |
||
| 139 | [:\/?#\[\]@ ] # gen-delims characters as well as the space character |
||
| 140 | /ix'; |
||
| 141 | |||
| 142 | /** |
||
| 143 | * Invalid path for URI without scheme and authority regular expression. |
||
| 144 | * |
||
| 145 | * @link https://tools.ietf.org/html/rfc3986#section-3.3 |
||
| 146 | */ |
||
| 147 | private const REGEXP_INVALID_PATH = ',^(([^/]*):)(.*)?/,'; |
||
| 148 | |||
| 149 | /** |
||
| 150 | * Host and Port splitter regular expression. |
||
| 151 | */ |
||
| 152 | private const REGEXP_HOST_PORT = ',^(?<host>\[.*\]|[^:]*)(:(?<port>.*))?$,'; |
||
| 153 | |||
| 154 | /** |
||
| 155 | * IDN Host detector regular expression. |
||
| 156 | */ |
||
| 157 | private const REGEXP_IDN_PATTERN = '/[^\x20-\x7f]/'; |
||
| 158 | |||
| 159 | /** |
||
| 160 | * Only the address block fe80::/10 can have a Zone ID attach to |
||
| 161 | * let's detect the link local significant 10 bits. |
||
| 162 | */ |
||
| 163 | private const ZONE_ID_ADDRESS_BLOCK = "\xfe\x80"; |
||
| 164 | |||
| 165 | /** |
||
| 166 | * Generate an URI string representation from its parsed representation |
||
| 167 | * returned by League\Uri\parse() or PHP's parse_url. |
||
| 168 | * |
||
| 169 | * If you supply your own array, you are responsible for providing |
||
| 170 | * valid components without their URI delimiters. |
||
| 171 | * |
||
| 172 | * @link https://tools.ietf.org/html/rfc3986#section-5.3 |
||
| 173 | * @link https://tools.ietf.org/html/rfc3986#section-7.5 |
||
| 174 | * |
||
| 175 | * @param array{ |
||
| 176 | * scheme:?string, |
||
| 177 | * user:?string, |
||
| 178 | * pass:?string, |
||
| 179 | * host:?string, |
||
| 180 | * port:?int, |
||
| 181 | * path:string, |
||
| 182 | * query:?string, |
||
| 183 | * fragment:?string |
||
| 184 | * } $components |
||
| 185 | */ |
||
| 186 | 94 | public static function build(array $components): string |
|
| 223 | |||
| 224 | /** |
||
| 225 | * Parse an URI string into its components. |
||
| 226 | * |
||
| 227 | * This method parses a URI and returns an associative array containing any |
||
| 228 | * of the various components of the URI that are present. |
||
| 229 | * |
||
| 230 | * <code> |
||
| 231 | * $components = (new Parser())->parse('http://[email protected]:42?query#'); |
||
| 232 | * var_export($components); |
||
| 233 | * //will display |
||
| 234 | * array( |
||
| 235 | * 'scheme' => 'http', // the URI scheme component |
||
| 236 | * 'user' => 'foo', // the URI user component |
||
| 237 | * 'pass' => null, // the URI pass component |
||
| 238 | * 'host' => 'test.example.com', // the URI host component |
||
| 239 | * 'port' => 42, // the URI port component |
||
| 240 | * 'path' => '', // the URI path component |
||
| 241 | * 'query' => 'query', // the URI query component |
||
| 242 | * 'fragment' => '', // the URI fragment component |
||
| 243 | * ); |
||
| 244 | * </code> |
||
| 245 | * |
||
| 246 | * The returned array is similar to PHP's parse_url return value with the following |
||
| 247 | * differences: |
||
| 248 | * |
||
| 249 | * <ul> |
||
| 250 | * <li>All components are always present in the returned array</li> |
||
| 251 | * <li>Empty and undefined component are treated differently. And empty component is |
||
| 252 | * set to the empty string while an undefined component is set to the `null` value.</li> |
||
| 253 | * <li>The path component is never undefined</li> |
||
| 254 | * <li>The method parses the URI following the RFC3986 rules but you are still |
||
| 255 | * required to validate the returned components against its related scheme specific rules.</li> |
||
| 256 | * </ul> |
||
| 257 | * |
||
| 258 | * @link https://tools.ietf.org/html/rfc3986 |
||
| 259 | * |
||
| 260 | * @param mixed $uri any scalar or stringable object |
||
| 261 | * |
||
| 262 | * @throws SyntaxError if the URI contains invalid characters |
||
| 263 | * @throws SyntaxError if the URI contains an invalid scheme |
||
| 264 | * @throws SyntaxError if the URI contains an invalid path |
||
| 265 | * |
||
| 266 | * @return array{ |
||
|
|
|||
| 267 | * scheme:?string, |
||
| 268 | * user:?string, |
||
| 269 | * pass:?string, |
||
| 270 | * host:?string, |
||
| 271 | * port:?int, |
||
| 272 | * path:string, |
||
| 273 | * query:?string, |
||
| 274 | * fragment:?string |
||
| 275 | * } |
||
| 276 | */ |
||
| 277 | 536 | public static function parse($uri): array |
|
| 349 | |||
| 350 | /** |
||
| 351 | * Parses the URI authority part. |
||
| 352 | * |
||
| 353 | * @link https://tools.ietf.org/html/rfc3986#section-3.2 |
||
| 354 | * |
||
| 355 | * @throws SyntaxError If the port component is invalid |
||
| 356 | * |
||
| 357 | * @return array{user:?string, pass:?string, host:?string, port:?int} |
||
| 358 | */ |
||
| 359 | 386 | private static function parseAuthority(string $authority): array |
|
| 379 | |||
| 380 | /** |
||
| 381 | * Filter and format the port component. |
||
| 382 | * |
||
| 383 | * @link https://tools.ietf.org/html/rfc3986#section-3.2.2 |
||
| 384 | * |
||
| 385 | * @throws SyntaxError if the registered name is invalid |
||
| 386 | */ |
||
| 387 | 374 | private static function filterPort(string $port): ?int |
|
| 399 | |||
| 400 | /** |
||
| 401 | * Returns whether a hostname is valid. |
||
| 402 | * |
||
| 403 | * @link https://tools.ietf.org/html/rfc3986#section-3.2.2 |
||
| 404 | * |
||
| 405 | * @throws SyntaxError if the registered name is invalid |
||
| 406 | */ |
||
| 407 | 362 | private static function filterHost(string $host): string |
|
| 423 | |||
| 424 | /** |
||
| 425 | * Returns whether the host is an IPv4 or a registered named. |
||
| 426 | * |
||
| 427 | * @link https://tools.ietf.org/html/rfc3986#section-3.2.2 |
||
| 428 | * |
||
| 429 | * @throws SyntaxError if the registered name is invalid |
||
| 430 | * @throws IdnSupportMissing if IDN support or ICU requirement are not available or met. |
||
| 431 | */ |
||
| 432 | 324 | private static function filterRegisteredName(string $host): string |
|
| 500 | |||
| 501 | /** |
||
| 502 | * Retrieves and format IDNA conversion error message. |
||
| 503 | * |
||
| 504 | * @link http://icu-project.org/apiref/icu4j/com/ibm/icu/text/IDNA.Error.html |
||
| 505 | */ |
||
| 506 | 6 | private static function getIDNAErrors(int $error_byte): string |
|
| 536 | |||
| 537 | /** |
||
| 538 | * Validates a IPv6/IPvfuture host. |
||
| 539 | * |
||
| 540 | * @link https://tools.ietf.org/html/rfc3986#section-3.2.2 |
||
| 541 | * @link https://tools.ietf.org/html/rfc6874#section-2 |
||
| 542 | * @link https://tools.ietf.org/html/rfc6874#section-4 |
||
| 543 | */ |
||
| 544 | 36 | private static function isIpHost(string $ip_host): bool |
|
| 567 | } |
||
| 568 |
This check marks PHPDoc comments that could not be parsed by our parser. To see which comment annotations we can parse, please refer to our documentation on supported doc-types.