Complex classes like Parser often do a lot of different things. To break such a class down, we need to identify a cohesive component within that class. A common approach to find such a component is to look for fields/methods that share the same prefixes, or suffixes. You can also have a look at the cohesion graph to spot any un-connected, or weakly-connected components.
Once you have determined the fields that belong together, you can apply the Extract Class refactoring. If the component makes sense as a sub-class, Extract Subclass is also a candidate, and is often faster.
While breaking up the class, it is a good idea to analyze how other classes use Parser, and based on these observations, apply Extract Interface, too.
1 | <?php |
||
13 | class Parser { |
||
14 | |||
15 | /** |
||
16 | * @var ClientInterface |
||
17 | */ |
||
18 | private $client; |
||
19 | |||
20 | /** |
||
21 | * @var array |
||
22 | */ |
||
23 | private static $cache; |
||
24 | |||
25 | /** |
||
26 | * Constructor |
||
27 | * @param ClientInterface $client HTTP Client |
||
28 | */ |
||
29 | public function __construct(ClientInterface $client) { |
||
32 | |||
33 | /** |
||
34 | * Parses a URL into a an array of metatags |
||
35 | * |
||
36 | * @param string $url URL to parse |
||
37 | * @return array |
||
38 | */ |
||
39 | 1 | public function parse($url = '') { |
|
69 | |||
70 | /** |
||
71 | * Parses image metatags |
||
72 | * |
||
73 | * @param string $url URL of the image |
||
74 | * @return array|false |
||
75 | */ |
||
76 | 1 | public function getImageData($url = '') { |
|
87 | |||
88 | /** |
||
89 | * Parses OEmbed data |
||
90 | * |
||
91 | * @param string $url URL of the image |
||
92 | * @return array|false |
||
93 | */ |
||
94 | 2 | public function getOEmbedData($url = '') { |
|
139 | |||
140 | /** |
||
141 | * Parses metatags from DOM |
||
142 | * |
||
143 | * @param string $url URL |
||
144 | * @return array|false |
||
145 | */ |
||
146 | 1 | public function getDOMData($url = '') { |
|
174 | |||
175 | /** |
||
176 | * Check if URL exists and is reachable by making an HTTP request to retrieve header information |
||
177 | * |
||
178 | * @param string $url URL of the resource |
||
179 | * @return boolean |
||
180 | */ |
||
181 | 1 | public function exists($url = '') { |
|
188 | |||
189 | /** |
||
190 | * Returns head of the resource |
||
191 | * |
||
192 | * @param string $url URL of the resource |
||
193 | * @return Response|false |
||
194 | */ |
||
195 | 1 | public function request($url = '') { |
|
211 | |||
212 | /** |
||
213 | * Get contents of the page |
||
214 | * |
||
215 | * @param string $url URL of the resource |
||
216 | * @return string |
||
217 | */ |
||
218 | 1 | public function read($url = '') { |
|
228 | |||
229 | /** |
||
230 | * Checks if resource is an html page |
||
231 | * |
||
232 | * @param string $url URL of the resource |
||
233 | * @return boolean |
||
234 | */ |
||
235 | 1 | public function isHTML($url = '') { |
|
239 | |||
240 | /** |
||
241 | * Checks if resource is JSON |
||
242 | * |
||
243 | * @param string $url URL of the resource |
||
244 | * @return boolean |
||
245 | */ |
||
246 | 1 | public function isJSON($url = '') { |
|
250 | |||
251 | /** |
||
252 | * Checks if resource is XML |
||
253 | * |
||
254 | * @param string $url URL of the resource |
||
255 | * @return boolean |
||
256 | */ |
||
257 | 1 | public function isXML($url = '') { |
|
261 | |||
262 | /** |
||
263 | * Checks if resource is an image |
||
264 | * |
||
265 | * @param string $url URL of the resource |
||
266 | * @return boolean |
||
267 | */ |
||
268 | 1 | public function isImage($url = '') { |
|
277 | |||
278 | /** |
||
279 | * Get mime type of the URL content |
||
280 | * |
||
281 | * @param string $url URL of the resource |
||
282 | * @return string |
||
283 | */ |
||
284 | 1 | public function getContentType($url = '') { |
|
295 | |||
296 | /** |
||
297 | * Returns HTML contents of the page |
||
298 | * |
||
299 | * @param string $url URL of the resource |
||
300 | * @return string |
||
301 | */ |
||
302 | 1 | public function getHTML($url = '') { |
|
308 | |||
309 | /** |
||
310 | * Returns HTML contents of the page as a DOMDocument |
||
311 | * |
||
312 | * @param string $url URL of the resource |
||
313 | * @return DOMDocument|false |
||
314 | */ |
||
315 | 1 | public function getDOM($url = '') { |
|
331 | |||
332 | /** |
||
333 | * Parses document title |
||
334 | * |
||
335 | * @param DOMDocument $doc Document |
||
336 | * @return string |
||
337 | */ |
||
338 | 1 | public function parseTitle(DOMDocument $doc) { |
|
343 | |||
344 | /** |
||
345 | * Parses <link> tags |
||
346 | * |
||
347 | * @param DOMDocument $doc Document |
||
348 | * @return array |
||
349 | */ |
||
350 | 1 | public function parseLinkTags(DOMDocument $doc) { |
|
385 | |||
386 | /** |
||
387 | * Parses <meta> tags |
||
388 | * |
||
389 | * @param DOMDocument $doc Document |
||
390 | * @return array |
||
391 | */ |
||
392 | 1 | public function parseMetaTags(DOMDocument $doc) { |
|
468 | |||
469 | /** |
||
470 | * Parses <img> tags |
||
471 | * |
||
472 | * @param DOMDocument $doc Document |
||
473 | * @return array |
||
474 | */ |
||
475 | 1 | public function parseImgTags(DOMDocument $doc) { |
|
487 | |||
488 | /** |
||
489 | * Normalizes relative URLs |
||
490 | * |
||
491 | * @param DOMDocument $doc Document |
||
492 | * @param string $href URL to normalize |
||
493 | * @return string |
||
494 | */ |
||
495 | 1 | public function getAbsoluteURL(DOMDocument $doc, $href = '') { |
|
514 | |||
515 | } |
||
516 |
As per the PSR-2 coding standard, there must not be a space in front of the colon in case statements.
To learn more about the PSR-2 coding standard, please refer to the PHP-Fig.