Complex classes like Uri often do a lot of different things. To break such a class down, we need to identify a cohesive component within that class. A common approach to find such a component is to look for fields/methods that share the same prefixes, or suffixes. You can also have a look at the cohesion graph to spot any un-connected, or weakly-connected components.
Once you have determined the fields that belong together, you can apply the Extract Class refactoring. If the component makes sense as a sub-class, Extract Subclass is also a candidate, and is often faster.
While breaking up the class, it is a good idea to analyze how other classes use Uri, and based on these observations, apply Extract Interface, too.
1 | <?php |
||
74 | final class Uri implements UriInterface |
||
75 | { |
||
76 | /** |
||
77 | * RFC3986 invalid characters. |
||
78 | * |
||
79 | * @link https://tools.ietf.org/html/rfc3986#section-2.2 |
||
80 | * |
||
81 | * @var string |
||
82 | */ |
||
83 | private const REGEXP_INVALID_CHARS = '/[\x00-\x1f\x7f]/'; |
||
84 | |||
85 | /** |
||
86 | * RFC3986 Sub delimiter characters regular expression pattern. |
||
87 | * |
||
88 | * @link https://tools.ietf.org/html/rfc3986#section-2.2 |
||
89 | * |
||
90 | * @var string |
||
91 | */ |
||
92 | private const REGEXP_CHARS_SUBDELIM = "\!\$&'\(\)\*\+,;\=%"; |
||
93 | |||
94 | /** |
||
95 | * RFC3986 unreserved characters regular expression pattern. |
||
96 | * |
||
97 | * @link https://tools.ietf.org/html/rfc3986#section-2.3 |
||
98 | * |
||
99 | * @var string |
||
100 | */ |
||
101 | private const REGEXP_CHARS_UNRESERVED = 'A-Za-z0-9_\-\.~'; |
||
102 | |||
103 | /** |
||
104 | * RFC3986 schema regular expression pattern. |
||
105 | * |
||
106 | * @link https://tools.ietf.org/html/rfc3986#section-3.1 |
||
107 | */ |
||
108 | private const REGEXP_SCHEME = ',^[a-z]([-a-z0-9+.]+)?$,i'; |
||
109 | |||
110 | /** |
||
111 | * RFC3986 host identified by a registered name regular expression pattern. |
||
112 | * |
||
113 | * @link https://tools.ietf.org/html/rfc3986#section-3.2.2 |
||
114 | */ |
||
115 | private const REGEXP_HOST_REGNAME = '/^( |
||
116 | (?<unreserved>[a-z0-9_~\-\.])| |
||
117 | (?<sub_delims>[!$&\'()*+,;=])| |
||
118 | (?<encoded>%[A-F0-9]{2}) |
||
119 | )+$/x'; |
||
120 | |||
121 | /** |
||
122 | * RFC3986 delimiters of the generic URI components regular expression pattern. |
||
123 | * |
||
124 | * @link https://tools.ietf.org/html/rfc3986#section-2.2 |
||
125 | */ |
||
126 | private const REGEXP_HOST_GEN_DELIMS = '/[:\/?#\[\]@ ]/'; // Also includes space. |
||
127 | |||
128 | /** |
||
129 | * RFC3986 IPvFuture regular expression pattern. |
||
130 | * |
||
131 | * @link https://tools.ietf.org/html/rfc3986#section-3.2.2 |
||
132 | */ |
||
133 | private const REGEXP_HOST_IPFUTURE = '/^ |
||
134 | v(?<version>[A-F0-9])+\. |
||
135 | (?: |
||
136 | (?<unreserved>[a-z0-9_~\-\.])| |
||
137 | (?<sub_delims>[!$&\'()*+,;=:]) # also include the : character |
||
138 | )+ |
||
139 | $/ix'; |
||
140 | |||
141 | /** |
||
142 | * Significant 10 bits of IP to detect Zone ID regular expression pattern. |
||
143 | */ |
||
144 | private const HOST_ADDRESS_BLOCK = "\xfe\x80"; |
||
145 | |||
146 | /** |
||
147 | * Regular expression pattern to for file URI. |
||
148 | */ |
||
149 | private const REGEXP_FILE_PATH = ',^(?<delim>/)?(?<root>[a-zA-Z][:|\|])(?<rest>.*)?,'; |
||
150 | |||
151 | /** |
||
152 | * Mimetype regular expression pattern. |
||
153 | * |
||
154 | * @link https://tools.ietf.org/html/rfc2397 |
||
155 | */ |
||
156 | private const REGEXP_MIMETYPE = ',^\w+/[-.\w]+(?:\+[-.\w]+)?$,'; |
||
157 | |||
158 | /** |
||
159 | * Base64 content regular expression pattern. |
||
160 | * |
||
161 | * @link https://tools.ietf.org/html/rfc2397 |
||
162 | */ |
||
163 | private const REGEXP_BINARY = ',(;|^)base64$,'; |
||
164 | |||
165 | /** |
||
166 | * Windows file path string regular expression pattern. |
||
167 | */ |
||
168 | private const REGEXP_WINDOW_PATH = ',^(?<root>[a-zA-Z][:|\|]),'; |
||
169 | |||
170 | |||
171 | /** |
||
172 | * Supported schemes and corresponding default port. |
||
173 | * |
||
174 | * @var array |
||
175 | */ |
||
176 | private const SCHEME_DEFAULT_PORT = [ |
||
177 | 'data' => null, |
||
178 | 'file' => null, |
||
179 | 'ftp' => 21, |
||
180 | 'gopher' => 70, |
||
181 | 'http' => 80, |
||
182 | 'https' => 443, |
||
183 | 'ws' => 80, |
||
184 | 'wss' => 443, |
||
185 | ]; |
||
186 | |||
187 | /** |
||
188 | * URI validation methods per scheme. |
||
189 | * |
||
190 | * @var array |
||
191 | */ |
||
192 | private const SCHEME_VALIDATION_METHOD = [ |
||
193 | 'data' => 'isUriWithSchemeAndPathOnly', |
||
194 | 'file' => 'isUriWithSchemeHostAndPathOnly', |
||
195 | 'ftp' => 'isNonEmptyHostUriWithoutFragmentAndQuery', |
||
196 | 'gopher' => 'isNonEmptyHostUriWithoutFragmentAndQuery', |
||
197 | 'http' => 'isNonEmptyHostUri', |
||
198 | 'https' => 'isNonEmptyHostUri', |
||
199 | 'ws' => 'isNonEmptyHostUriWithoutFragment', |
||
200 | 'wss' => 'isNonEmptyHostUriWithoutFragment', |
||
201 | ]; |
||
202 | |||
203 | /** |
||
204 | * All ASCII letters sorted by typical frequency of occurrence. |
||
205 | * |
||
206 | * @var string |
||
207 | */ |
||
208 | private const ASCII = "\x20\x65\x69\x61\x73\x6E\x74\x72\x6F\x6C\x75\x64\x5D\x5B\x63\x6D\x70\x27\x0A\x67\x7C\x68\x76\x2E\x66\x62\x2C\x3A\x3D\x2D\x71\x31\x30\x43\x32\x2A\x79\x78\x29\x28\x4C\x39\x41\x53\x2F\x50\x22\x45\x6A\x4D\x49\x6B\x33\x3E\x35\x54\x3C\x44\x34\x7D\x42\x7B\x38\x46\x77\x52\x36\x37\x55\x47\x4E\x3B\x4A\x7A\x56\x23\x48\x4F\x57\x5F\x26\x21\x4B\x3F\x58\x51\x25\x59\x5C\x09\x5A\x2B\x7E\x5E\x24\x40\x60\x7F\x00\x01\x02\x03\x04\x05\x06\x07\x08\x0B\x0C\x0D\x0E\x0F\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1A\x1B\x1C\x1D\x1E\x1F"; |
||
209 | |||
210 | /** |
||
211 | * URI scheme component. |
||
212 | * |
||
213 | * @var string|null |
||
214 | */ |
||
215 | private $scheme; |
||
216 | |||
217 | /** |
||
218 | * URI user info part. |
||
219 | * |
||
220 | * @var string|null |
||
221 | */ |
||
222 | private $user_info; |
||
223 | |||
224 | /** |
||
225 | * URI host component. |
||
226 | * |
||
227 | * @var string|null |
||
228 | */ |
||
229 | private $host; |
||
230 | |||
231 | /** |
||
232 | * URI port component. |
||
233 | * |
||
234 | * @var int|null |
||
235 | */ |
||
236 | private $port; |
||
237 | |||
238 | /** |
||
239 | * URI authority string representation. |
||
240 | * |
||
241 | * @var string|null |
||
242 | */ |
||
243 | private $authority; |
||
244 | |||
245 | /** |
||
246 | * URI path component. |
||
247 | * |
||
248 | * @var string |
||
249 | */ |
||
250 | private $path = ''; |
||
251 | |||
252 | /** |
||
253 | * URI query component. |
||
254 | * |
||
255 | * @var string|null |
||
256 | */ |
||
257 | private $query; |
||
258 | |||
259 | /** |
||
260 | * URI fragment component. |
||
261 | * |
||
262 | * @var string|null |
||
263 | */ |
||
264 | private $fragment; |
||
265 | |||
266 | /** |
||
267 | * URI string representation. |
||
268 | * |
||
269 | * @var string|null |
||
270 | */ |
||
271 | private $uri; |
||
272 | |||
273 | /** |
||
274 | * Create a new instance. |
||
275 | * |
||
276 | * @param ?string $scheme |
||
|
|||
277 | * @param ?string $user |
||
278 | * @param ?string $pass |
||
279 | * @param ?string $host |
||
280 | * @param ?int $port |
||
281 | * @param ?string $query |
||
282 | * @param ?string $fragment |
||
283 | */ |
||
284 | 308 | private function __construct( |
|
304 | |||
305 | /** |
||
306 | * Format the Scheme and Host component. |
||
307 | * |
||
308 | * @param ?string $scheme |
||
309 | * |
||
310 | * @throws SyntaxError if the scheme is invalid |
||
311 | */ |
||
312 | 314 | private function formatScheme(?string $scheme): ?string |
|
325 | |||
326 | /** |
||
327 | * Set the UserInfo component. |
||
328 | * |
||
329 | * @param ?string $user |
||
330 | * @param ?string $password |
||
331 | */ |
||
332 | 322 | private function formatUserInfo(?string $user, ?string $password): ?string |
|
348 | |||
349 | /** |
||
350 | * Returns the RFC3986 encoded string matched. |
||
351 | */ |
||
352 | 12 | private static function urlEncodeMatch(array $matches): string |
|
356 | |||
357 | /** |
||
358 | * Validate and Format the Host component. |
||
359 | * |
||
360 | * @param ?string $host |
||
361 | */ |
||
362 | 340 | private function formatHost(?string $host): ?string |
|
374 | |||
375 | /** |
||
376 | * Validate and format a registered name. |
||
377 | * |
||
378 | * The host is converted to its ascii representation if needed |
||
379 | * |
||
380 | * @throws IdnSupportMissing if the submitted host required missing or misconfigured IDN support |
||
381 | * @throws SyntaxError if the submitted host is not a valid registered name |
||
382 | */ |
||
383 | 274 | private function formatRegisteredName(string $host): string |
|
458 | |||
459 | /** |
||
460 | * Retrieves and format IDNA conversion error message. |
||
461 | * |
||
462 | * @link http://icu-project.org/apiref/icu4j/com/ibm/icu/text/IDNA.Error.html |
||
463 | */ |
||
464 | 4 | private function getIDNAErrors(int $error_byte): string |
|
494 | |||
495 | /** |
||
496 | * Validate and Format the IPv6/IPvfuture host. |
||
497 | * |
||
498 | * @throws SyntaxError if the submitted host is not a valid IP host |
||
499 | */ |
||
500 | 16 | private function formatIp(string $host): string |
|
533 | |||
534 | /** |
||
535 | * Format the Port component. |
||
536 | * |
||
537 | * @param null|mixed $port |
||
538 | * |
||
539 | * @throws SyntaxError |
||
540 | */ |
||
541 | 336 | private function formatPort($port = null): ?int |
|
563 | |||
564 | /** |
||
565 | * {@inheritDoc} |
||
566 | */ |
||
567 | 18 | public static function __set_state(array $components): self |
|
586 | |||
587 | /** |
||
588 | * Create a new instance from a URI and a Base URI. |
||
589 | * |
||
590 | * The returned URI must be absolute. |
||
591 | * |
||
592 | * @param mixed $uri the input URI to create |
||
593 | * @param null|mixed $base_uri the base URI used for reference |
||
594 | */ |
||
595 | 86 | public static function createFromBaseUri($uri, $base_uri = null): UriInterface |
|
629 | |||
630 | /** |
||
631 | * Create a new instance from a string. |
||
632 | * |
||
633 | * @param string|mixed $uri |
||
634 | */ |
||
635 | 286 | public static function createFromString($uri = ''): self |
|
650 | |||
651 | /** |
||
652 | * Create a new instance from a hash of parse_url parts. |
||
653 | * |
||
654 | * Create an new instance from a hash representation of the URI similar |
||
655 | * to PHP parse_url function result |
||
656 | * |
||
657 | * @param array<string, mixed> $components |
||
658 | */ |
||
659 | 90 | public static function createFromComponents(array $components = []): self |
|
677 | |||
678 | /** |
||
679 | * Create a new instance from a data file path. |
||
680 | * |
||
681 | * @param resource|null $context |
||
682 | * |
||
683 | * @throws FileinfoSupportMissing If ext/fileinfo is not installed |
||
684 | * @throws SyntaxError If the file does not exist or is not readable |
||
685 | */ |
||
686 | 6 | public static function createFromDataPath(string $path, $context = null): self |
|
716 | |||
717 | /** |
||
718 | * Create a new instance from a Unix path string. |
||
719 | */ |
||
720 | 10 | public static function createFromUnixPath(string $uri = ''): self |
|
729 | |||
730 | /** |
||
731 | * Create a new instance from a local Windows path string. |
||
732 | */ |
||
733 | 16 | public static function createFromWindowsPath(string $uri = ''): self |
|
757 | |||
758 | /** |
||
759 | * Create a new instance from a URI object. |
||
760 | * |
||
761 | * @param Psr7UriInterface|UriInterface $uri the input URI to create |
||
762 | */ |
||
763 | 4 | public static function createFromUri($uri): self |
|
827 | |||
828 | /** |
||
829 | * Create a new instance from the environment. |
||
830 | */ |
||
831 | 26 | public static function createFromServer(array $server): self |
|
847 | |||
848 | /** |
||
849 | * Returns the environment scheme. |
||
850 | */ |
||
851 | 26 | private static function fetchScheme(array $server): string |
|
858 | |||
859 | /** |
||
860 | * Returns the environment user info. |
||
861 | * |
||
862 | * @return array{0:?string, 1:?string} |
||
863 | */ |
||
864 | 28 | private static function fetchUserInfo(array $server): array |
|
887 | |||
888 | /** |
||
889 | * Returns the environment host. |
||
890 | * |
||
891 | * @throws SyntaxError If the host can not be detected |
||
892 | * |
||
893 | * @return array{0:?string, 1:?string} |
||
894 | */ |
||
895 | 28 | private static function fetchHostname(array $server): array |
|
921 | |||
922 | /** |
||
923 | * Returns the environment path. |
||
924 | * |
||
925 | * @return array{0:?string, 1:?string} |
||
926 | */ |
||
927 | 26 | private static function fetchRequestUri(array $server): array |
|
946 | |||
947 | /** |
||
948 | * Generate the URI authority part. |
||
949 | */ |
||
950 | 310 | private function setAuthority(): ?string |
|
967 | |||
968 | /** |
||
969 | * Format the Path component. |
||
970 | */ |
||
971 | 326 | private function formatPath(string $path): string |
|
981 | |||
982 | /** |
||
983 | * Filter the Path component. |
||
984 | * |
||
985 | * @link https://tools.ietf.org/html/rfc2397 |
||
986 | * |
||
987 | * @throws SyntaxError If the path is not compliant with RFC2397 |
||
988 | */ |
||
989 | 340 | private function formatDataPath(string $path): string |
|
1020 | |||
1021 | /** |
||
1022 | * Assert the path is a compliant with RFC2397. |
||
1023 | * |
||
1024 | * @link https://tools.ietf.org/html/rfc2397 |
||
1025 | * |
||
1026 | * @throws SyntaxError If the mediatype or the data are not compliant with the RFC2397 |
||
1027 | */ |
||
1028 | 22 | private function assertValidPath(string $mimetype, string $parameters, string $data): void |
|
1053 | |||
1054 | /** |
||
1055 | * Validate mediatype parameter. |
||
1056 | */ |
||
1057 | 4 | private function validateParameter(string $parameter): bool |
|
1063 | |||
1064 | /** |
||
1065 | * Format path component for file scheme. |
||
1066 | */ |
||
1067 | 334 | private function formatFilePath(string $path): string |
|
1079 | |||
1080 | /** |
||
1081 | * Format the Query or the Fragment component. |
||
1082 | * |
||
1083 | * Returns a array containing: |
||
1084 | * <ul> |
||
1085 | * <li> the formatted component (a string or null)</li> |
||
1086 | * <li> a boolean flag telling wether the delimiter is to be added to the component |
||
1087 | * when building the URI string representation</li> |
||
1088 | * </ul> |
||
1089 | * |
||
1090 | * @param ?string $component |
||
1091 | */ |
||
1092 | 320 | private function formatQueryAndFragment(?string $component): ?string |
|
1101 | |||
1102 | /** |
||
1103 | * assert the URI internal state is valid. |
||
1104 | * |
||
1105 | * @link https://tools.ietf.org/html/rfc3986#section-3 |
||
1106 | * @link https://tools.ietf.org/html/rfc3986#section-3.3 |
||
1107 | * |
||
1108 | * @throws SyntaxError if the URI is in an invalid state according to RFC3986 |
||
1109 | * @throws SyntaxError if the URI is in an invalid state according to scheme specific rules |
||
1110 | */ |
||
1111 | 368 | private function assertValidState(): void |
|
1139 | |||
1140 | /** |
||
1141 | * URI validation for URI schemes which allows only scheme and path components. |
||
1142 | */ |
||
1143 | 2 | private function isUriWithSchemeAndPathOnly(): bool |
|
1149 | |||
1150 | /** |
||
1151 | * URI validation for URI schemes which allows only scheme, host and path components. |
||
1152 | */ |
||
1153 | 22 | private function isUriWithSchemeHostAndPathOnly(): bool |
|
1161 | |||
1162 | /** |
||
1163 | * URI validation for URI schemes which disallow the empty '' host. |
||
1164 | */ |
||
1165 | 260 | private function isNonEmptyHostUri(): bool |
|
1170 | |||
1171 | /** |
||
1172 | * URI validation for URIs schemes which disallow the empty '' host |
||
1173 | * and forbids the fragment component. |
||
1174 | */ |
||
1175 | 18 | private function isNonEmptyHostUriWithoutFragment(): bool |
|
1179 | |||
1180 | /** |
||
1181 | * URI validation for URIs schemes which disallow the empty '' host |
||
1182 | * and forbids fragment and query components. |
||
1183 | */ |
||
1184 | 22 | private function isNonEmptyHostUriWithoutFragmentAndQuery(): bool |
|
1188 | |||
1189 | /** |
||
1190 | * Generate the URI string representation from its components. |
||
1191 | * |
||
1192 | * @link https://tools.ietf.org/html/rfc3986#section-5.3 |
||
1193 | * |
||
1194 | * @param ?string $scheme |
||
1195 | * @param ?string $authority |
||
1196 | * @param ?string $query |
||
1197 | * @param ?string $fragment |
||
1198 | */ |
||
1199 | 262 | private function getUriString( |
|
1224 | |||
1225 | /** |
||
1226 | * {@inheritDoc} |
||
1227 | */ |
||
1228 | 272 | public function __toString(): string |
|
1240 | |||
1241 | /** |
||
1242 | * {@inheritDoc} |
||
1243 | */ |
||
1244 | 2 | public function jsonSerialize(): string |
|
1248 | |||
1249 | /** |
||
1250 | * {@inheritDoc} |
||
1251 | * |
||
1252 | * @return array{scheme:?string, user_info:?string, host:?string, port:?int, path:string, query:?string, fragment:?string} |
||
1253 | */ |
||
1254 | 2 | public function __debugInfo(): array |
|
1266 | |||
1267 | /** |
||
1268 | * {@inheritDoc} |
||
1269 | */ |
||
1270 | 244 | public function getScheme(): ?string |
|
1274 | |||
1275 | /** |
||
1276 | * {@inheritDoc} |
||
1277 | */ |
||
1278 | 198 | public function getAuthority(): ?string |
|
1282 | |||
1283 | /** |
||
1284 | * {@inheritDoc} |
||
1285 | */ |
||
1286 | 96 | public function getUserInfo(): ?string |
|
1290 | |||
1291 | /** |
||
1292 | * {@inheritDoc} |
||
1293 | */ |
||
1294 | 208 | public function getHost(): ?string |
|
1298 | |||
1299 | /** |
||
1300 | * {@inheritDoc} |
||
1301 | */ |
||
1302 | 238 | public function getPort(): ?int |
|
1306 | |||
1307 | /** |
||
1308 | * {@inheritDoc} |
||
1309 | */ |
||
1310 | 206 | public function getPath(): string |
|
1314 | |||
1315 | /** |
||
1316 | * {@inheritDoc} |
||
1317 | */ |
||
1318 | 114 | public function getQuery(): ?string |
|
1322 | |||
1323 | /** |
||
1324 | * {@inheritDoc} |
||
1325 | */ |
||
1326 | 26 | public function getFragment(): ?string |
|
1330 | |||
1331 | /** |
||
1332 | * {@inheritDoc} |
||
1333 | */ |
||
1334 | 148 | public function withScheme($scheme): UriInterface |
|
1349 | |||
1350 | /** |
||
1351 | * Filter a string. |
||
1352 | * |
||
1353 | * @param mixed $str the value to evaluate as a string |
||
1354 | * |
||
1355 | * @throws SyntaxError if the submitted data can not be converted to string |
||
1356 | */ |
||
1357 | 216 | private function filterString($str): ?string |
|
1378 | |||
1379 | /** |
||
1380 | * {@inheritDoc} |
||
1381 | */ |
||
1382 | 154 | public function withUserInfo($user, $password = null): UriInterface |
|
1405 | |||
1406 | /** |
||
1407 | * {@inheritDoc} |
||
1408 | */ |
||
1409 | 178 | public function withHost($host): UriInterface |
|
1423 | |||
1424 | /** |
||
1425 | * {@inheritDoc} |
||
1426 | */ |
||
1427 | 136 | public function withPort($port): UriInterface |
|
1441 | |||
1442 | /** |
||
1443 | * {@inheritDoc} |
||
1444 | */ |
||
1445 | 180 | public function withPath($path): UriInterface |
|
1463 | |||
1464 | /** |
||
1465 | * {@inheritDoc} |
||
1466 | */ |
||
1467 | 112 | public function withQuery($query): UriInterface |
|
1480 | |||
1481 | /** |
||
1482 | * {@inheritDoc} |
||
1483 | */ |
||
1484 | 32 | public function withFragment($fragment): UriInterface |
|
1497 | } |
||
1498 |
This check marks PHPDoc comments that could not be parsed by our parser. To see which comment annotations we can parse, please refer to our documentation on supported doc-types.