Complex classes like UriString often do a lot of different things. To break such a class down, we need to identify a cohesive component within that class. A common approach to find such a component is to look for fields/methods that share the same prefixes, or suffixes. You can also have a look at the cohesion graph to spot any un-connected, or weakly-connected components.
Once you have determined the fields that belong together, you can apply the Extract Class refactoring. If the component makes sense as a sub-class, Extract Subclass is also a candidate, and is often faster.
While breaking up the class, it is a good idea to analyze how other classes use UriString, and based on these observations, apply Extract Interface, too.
1 | <?php |
||
60 | final class UriString |
||
61 | { |
||
62 | /** |
||
63 | * Default URI component values. |
||
64 | */ |
||
65 | private const URI_COMPONENTS = [ |
||
66 | 'scheme' => null, 'user' => null, 'pass' => null, 'host' => null, |
||
67 | 'port' => null, 'path' => '', 'query' => null, 'fragment' => null, |
||
68 | ]; |
||
69 | |||
70 | /** |
||
71 | * Simple URI which do not need any parsing. |
||
72 | */ |
||
73 | private const URI_SCHORTCUTS = [ |
||
74 | '' => [], |
||
75 | '#' => ['fragment' => ''], |
||
76 | '?' => ['query' => ''], |
||
77 | '?#' => ['query' => '', 'fragment' => ''], |
||
78 | '/' => ['path' => '/'], |
||
79 | '//' => ['host' => ''], |
||
80 | ]; |
||
81 | |||
82 | /** |
||
83 | * Range of invalid characters in URI string. |
||
84 | */ |
||
85 | private const REGEXP_INVALID_URI_CHARS = '/[\x00-\x1f\x7f]/'; |
||
86 | |||
87 | /** |
||
88 | * RFC3986 regular expression URI splitter. |
||
89 | * |
||
90 | * @link https://tools.ietf.org/html/rfc3986#appendix-B |
||
91 | */ |
||
92 | private const REGEXP_URI_PARTS = ',^ |
||
93 | (?<scheme>(?<scontent>[^:/?\#]+):)? # URI scheme component |
||
94 | (?<authority>//(?<acontent>[^/?\#]*))? # URI authority part |
||
95 | (?<path>[^?\#]*) # URI path component |
||
96 | (?<query>\?(?<qcontent>[^\#]*))? # URI query component |
||
97 | (?<fragment>\#(?<fcontent>.*))? # URI fragment component |
||
98 | ,x'; |
||
99 | |||
100 | /** |
||
101 | * URI scheme regular expresssion. |
||
102 | * |
||
103 | * @link https://tools.ietf.org/html/rfc3986#section-3.1 |
||
104 | */ |
||
105 | private const REGEXP_URI_SCHEME = '/^([a-z][a-z\d\+\.\-]*)?$/i'; |
||
106 | |||
107 | /** |
||
108 | * IPvFuture regular expression. |
||
109 | * |
||
110 | * @link https://tools.ietf.org/html/rfc3986#section-3.2.2 |
||
111 | */ |
||
112 | private const REGEXP_IP_FUTURE = '/^ |
||
113 | v(?<version>[A-F0-9])+\. |
||
114 | (?: |
||
115 | (?<unreserved>[a-z0-9_~\-\.])| |
||
116 | (?<sub_delims>[!$&\'()*+,;=:]) # also include the : character |
||
117 | )+ |
||
118 | $/ix'; |
||
119 | |||
120 | /** |
||
121 | * General registered name regular expression. |
||
122 | * |
||
123 | * @link https://tools.ietf.org/html/rfc3986#section-3.2.2 |
||
124 | */ |
||
125 | private const REGEXP_REGISTERED_NAME = '/(?(DEFINE) |
||
126 | (?<unreserved>[a-z0-9_~\-]) # . is missing as it is used to separate labels |
||
127 | (?<sub_delims>[!$&\'()*+,;=]) |
||
128 | (?<encoded>%[A-F0-9]{2}) |
||
129 | (?<reg_name>(?:(?&unreserved)|(?&sub_delims)|(?&encoded))*) |
||
130 | ) |
||
131 | ^(?:(?®_name)\.)*(?®_name)\.?$/ix'; |
||
132 | |||
133 | /** |
||
134 | * Invalid characters in host regular expression. |
||
135 | * |
||
136 | * @link https://tools.ietf.org/html/rfc3986#section-3.2.2 |
||
137 | */ |
||
138 | private const REGEXP_INVALID_HOST_CHARS = '/ |
||
139 | [:\/?#\[\]@ ] # gen-delims characters as well as the space character |
||
140 | /ix'; |
||
141 | |||
142 | /** |
||
143 | * Invalid path for URI without scheme and authority regular expression. |
||
144 | * |
||
145 | * @link https://tools.ietf.org/html/rfc3986#section-3.3 |
||
146 | */ |
||
147 | private const REGEXP_INVALID_PATH = ',^(([^/]*):)(.*)?/,'; |
||
148 | |||
149 | /** |
||
150 | * Host and Port splitter regular expression. |
||
151 | */ |
||
152 | private const REGEXP_HOST_PORT = ',^(?<host>\[.*\]|[^:]*)(:(?<port>.*))?$,'; |
||
153 | |||
154 | /** |
||
155 | * IDN Host detector regular expression. |
||
156 | */ |
||
157 | private const REGEXP_IDN_PATTERN = '/[^\x20-\x7f]/'; |
||
158 | |||
159 | /** |
||
160 | * Only the address block fe80::/10 can have a Zone ID attach to |
||
161 | * let's detect the link local significant 10 bits. |
||
162 | */ |
||
163 | private const ZONE_ID_ADDRESS_BLOCK = "\xfe\x80"; |
||
164 | |||
165 | /** |
||
166 | * Generate an URI string representation from its parsed representation |
||
167 | * returned by League\Uri\parse() or PHP's parse_url. |
||
168 | * |
||
169 | * If you supply your own array, you are responsible for providing |
||
170 | * valid components without their URI delimiters. |
||
171 | * |
||
172 | * @link https://tools.ietf.org/html/rfc3986#section-5.3 |
||
173 | * @link https://tools.ietf.org/html/rfc3986#section-7.5 |
||
174 | * |
||
175 | * @param array{ |
||
176 | * scheme:?string, |
||
177 | * user:?string, |
||
178 | * pass:?string, |
||
179 | * host:?string, |
||
180 | * port:?int, |
||
181 | * path:string, |
||
182 | * query:?string, |
||
183 | * fragment:?string |
||
184 | * } $components |
||
185 | */ |
||
186 | 94 | public static function build(array $components): string |
|
223 | |||
224 | /** |
||
225 | * Parse an URI string into its components. |
||
226 | * |
||
227 | * This method parses a URI and returns an associative array containing any |
||
228 | * of the various components of the URI that are present. |
||
229 | * |
||
230 | * <code> |
||
231 | * $components = (new Parser())->parse('http://[email protected]:42?query#'); |
||
232 | * var_export($components); |
||
233 | * //will display |
||
234 | * array( |
||
235 | * 'scheme' => 'http', // the URI scheme component |
||
236 | * 'user' => 'foo', // the URI user component |
||
237 | * 'pass' => null, // the URI pass component |
||
238 | * 'host' => 'test.example.com', // the URI host component |
||
239 | * 'port' => 42, // the URI port component |
||
240 | * 'path' => '', // the URI path component |
||
241 | * 'query' => 'query', // the URI query component |
||
242 | * 'fragment' => '', // the URI fragment component |
||
243 | * ); |
||
244 | * </code> |
||
245 | * |
||
246 | * The returned array is similar to PHP's parse_url return value with the following |
||
247 | * differences: |
||
248 | * |
||
249 | * <ul> |
||
250 | * <li>All components are always present in the returned array</li> |
||
251 | * <li>Empty and undefined component are treated differently. And empty component is |
||
252 | * set to the empty string while an undefined component is set to the `null` value.</li> |
||
253 | * <li>The path component is never undefined</li> |
||
254 | * <li>The method parses the URI following the RFC3986 rules but you are still |
||
255 | * required to validate the returned components against its related scheme specific rules.</li> |
||
256 | * </ul> |
||
257 | * |
||
258 | * @link https://tools.ietf.org/html/rfc3986 |
||
259 | * |
||
260 | * @param mixed $uri any scalar or stringable object |
||
261 | * |
||
262 | * @throws SyntaxError if the URI contains invalid characters |
||
263 | * @throws SyntaxError if the URI contains an invalid scheme |
||
264 | * @throws SyntaxError if the URI contains an invalid path |
||
265 | * |
||
266 | * @return array{ |
||
|
|||
267 | * scheme:?string, |
||
268 | * user:?string, |
||
269 | * pass:?string, |
||
270 | * host:?string, |
||
271 | * port:?int, |
||
272 | * path:string, |
||
273 | * query:?string, |
||
274 | * fragment:?string |
||
275 | * } |
||
276 | */ |
||
277 | 536 | public static function parse($uri): array |
|
349 | |||
350 | /** |
||
351 | * Parses the URI authority part. |
||
352 | * |
||
353 | * @link https://tools.ietf.org/html/rfc3986#section-3.2 |
||
354 | * |
||
355 | * @throws SyntaxError If the port component is invalid |
||
356 | * |
||
357 | * @return array{user:?string, pass:?string, host:?string, port:?int} |
||
358 | */ |
||
359 | 386 | private static function parseAuthority(string $authority): array |
|
379 | |||
380 | /** |
||
381 | * Filter and format the port component. |
||
382 | * |
||
383 | * @link https://tools.ietf.org/html/rfc3986#section-3.2.2 |
||
384 | * |
||
385 | * @throws SyntaxError if the registered name is invalid |
||
386 | */ |
||
387 | 374 | private static function filterPort(string $port): ?int |
|
399 | |||
400 | /** |
||
401 | * Returns whether a hostname is valid. |
||
402 | * |
||
403 | * @link https://tools.ietf.org/html/rfc3986#section-3.2.2 |
||
404 | * |
||
405 | * @throws SyntaxError if the registered name is invalid |
||
406 | */ |
||
407 | 362 | private static function filterHost(string $host): string |
|
423 | |||
424 | /** |
||
425 | * Returns whether the host is an IPv4 or a registered named. |
||
426 | * |
||
427 | * @link https://tools.ietf.org/html/rfc3986#section-3.2.2 |
||
428 | * |
||
429 | * @throws SyntaxError if the registered name is invalid |
||
430 | * @throws IdnSupportMissing if IDN support or ICU requirement are not available or met. |
||
431 | */ |
||
432 | 324 | private static function filterRegisteredName(string $host): string |
|
500 | |||
501 | /** |
||
502 | * Retrieves and format IDNA conversion error message. |
||
503 | * |
||
504 | * @link http://icu-project.org/apiref/icu4j/com/ibm/icu/text/IDNA.Error.html |
||
505 | */ |
||
506 | 6 | private static function getIDNAErrors(int $error_byte): string |
|
536 | |||
537 | /** |
||
538 | * Validates a IPv6/IPvfuture host. |
||
539 | * |
||
540 | * @link https://tools.ietf.org/html/rfc3986#section-3.2.2 |
||
541 | * @link https://tools.ietf.org/html/rfc6874#section-2 |
||
542 | * @link https://tools.ietf.org/html/rfc6874#section-4 |
||
543 | */ |
||
544 | 36 | private static function isIpHost(string $ip_host): bool |
|
567 | } |
||
568 |
This check marks PHPDoc comments that could not be parsed by our parser. To see which comment annotations we can parse, please refer to our documentation on supported doc-types.