Complex classes like Parser often do a lot of different things. To break such a class down, we need to identify a cohesive component within that class. A common approach to find such a component is to look for fields/methods that share the same prefixes, or suffixes. You can also have a look at the cohesion graph to spot any un-connected, or weakly-connected components.
Once you have determined the fields that belong together, you can apply the Extract Class refactoring. If the component makes sense as a sub-class, Extract Subclass is also a candidate, and is often faster.
While breaking up the class, it is a good idea to analyze how other classes use Parser, and based on these observations, apply Extract Interface, too.
1 | <?php |
||
13 | class Parser { |
||
14 | |||
15 | /** |
||
16 | * @var ClientInterface |
||
17 | */ |
||
18 | private $client; |
||
19 | |||
20 | /** |
||
21 | * @var array |
||
22 | */ |
||
23 | private static $cache; |
||
24 | |||
25 | /** |
||
26 | * Constructor |
||
27 | * @param ClientInterface $client HTTP Client |
||
28 | */ |
||
29 | public function __construct(ClientInterface $client) { |
||
32 | |||
33 | /** |
||
34 | * Parses a URL into a an array of metatags |
||
35 | * |
||
36 | * @param string $url URL to parse |
||
37 | * @return array |
||
38 | */ |
||
39 | 1 | public function parse($url = '') { |
|
69 | |||
70 | /** |
||
71 | * Parses image metatags |
||
72 | * |
||
73 | * @param string $url URL of the image |
||
74 | * @return array|false |
||
75 | */ |
||
76 | 1 | public function getImageData($url = '') { |
|
87 | |||
88 | /** |
||
89 | * Parses OEmbed data |
||
90 | * |
||
91 | * @param string $url URL of the image |
||
92 | * @return array|false |
||
93 | */ |
||
94 | 2 | public function getOEmbedData($url = '') { |
|
139 | |||
140 | /** |
||
141 | * Parses metatags from DOM |
||
142 | * |
||
143 | * @param string $url URL |
||
144 | * @return array|false |
||
145 | */ |
||
146 | 1 | public function getDOMData($url = '') { |
|
174 | |||
175 | /** |
||
176 | * Check if URL exists and is reachable by making an HTTP request to retrieve header information |
||
177 | * |
||
178 | * @param string $url URL of the resource |
||
179 | * @return boolean |
||
180 | */ |
||
181 | 1 | public function exists($url = '') { |
|
188 | |||
189 | /** |
||
190 | * Validate URL |
||
191 | * |
||
192 | * @param string $url URL to validate |
||
193 | * @return bool |
||
194 | */ |
||
195 | 1 | public function isValidUrl($url = '') { |
|
216 | |||
217 | /** |
||
218 | 1 | * Returns head of the resource |
|
219 | 1 | * |
|
220 | 1 | * @param string $url URL of the resource |
|
221 | 1 | * @return Response|false |
|
222 | */ |
||
223 | public function request($url = '') { |
||
240 | |||
241 | /** |
||
242 | * Get contents of the page |
||
243 | * |
||
244 | * @param string $url URL of the resource |
||
245 | * @return string |
||
246 | 1 | */ |
|
247 | 1 | public function read($url = '') { |
|
257 | 1 | ||
258 | 1 | /** |
|
259 | 1 | * Checks if resource is an html page |
|
260 | * |
||
261 | * @param string $url URL of the resource |
||
262 | * @return boolean |
||
263 | */ |
||
264 | public function isHTML($url = '') { |
||
268 | 1 | ||
269 | 1 | /** |
|
270 | 1 | * Checks if resource is JSON |
|
271 | 1 | * |
|
272 | 1 | * @param string $url URL of the resource |
|
273 | * @return boolean |
||
274 | */ |
||
275 | 1 | public function isJSON($url = '') { |
|
279 | |||
280 | /** |
||
281 | * Checks if resource is XML |
||
282 | * |
||
283 | * @param string $url URL of the resource |
||
284 | 1 | * @return boolean |
|
285 | 1 | */ |
|
286 | 1 | public function isXML($url = '') { |
|
290 | 1 | ||
291 | /** |
||
292 | 1 | * Checks if resource is an image |
|
293 | 1 | * |
|
294 | * @param string $url URL of the resource |
||
295 | * @return boolean |
||
296 | */ |
||
297 | public function isImage($url = '') { |
||
306 | 1 | ||
307 | /** |
||
308 | * Get mime type of the URL content |
||
309 | * |
||
310 | * @param string $url URL of the resource |
||
311 | * @return string |
||
312 | */ |
||
313 | public function getContentType($url = '') { |
||
324 | |||
325 | /** |
||
326 | 1 | * Returns HTML contents of the page |
|
327 | 1 | * |
|
328 | 1 | * @param string $url URL of the resource |
|
329 | 1 | * @return string |
|
330 | */ |
||
331 | public function getHTML($url = '') { |
||
337 | |||
338 | 1 | /** |
|
339 | 1 | * Returns HTML contents of the page as a DOMDocument |
|
340 | 1 | * |
|
341 | 1 | * @param string $url URL of the resource |
|
342 | * @return DOMDocument|false |
||
343 | */ |
||
344 | public function getDOM($url = '') { |
||
360 | |||
361 | 1 | /** |
|
362 | 1 | * Parses document title |
|
363 | 1 | * |
|
364 | * @param DOMDocument $doc Document |
||
365 | 1 | * @return string |
|
366 | 1 | */ |
|
367 | 1 | public function parseTitle(DOMDocument $doc) { |
|
372 | 1 | ||
373 | 1 | /** |
|
374 | 1 | * Parses <link> tags |
|
375 | * |
||
376 | 1 | * @param DOMDocument $doc Document |
|
377 | 1 | * @return array |
|
378 | 1 | */ |
|
379 | 1 | public function parseLinkTags(DOMDocument $doc) { |
|
417 | |||
418 | /** |
||
419 | * Parses <meta> tags |
||
420 | * |
||
421 | 1 | * @param DOMDocument $doc Document |
|
422 | 1 | * @return array |
|
423 | 1 | */ |
|
424 | 1 | public function parseMetaTags(DOMDocument $doc) { |
|
503 | 1 | ||
504 | 1 | /** |
|
505 | * Parses <img> tags |
||
506 | * |
||
507 | 1 | * @param DOMDocument $doc Document |
|
508 | * @return array |
||
509 | */ |
||
510 | 1 | public function parseImgTags(DOMDocument $doc) { |
|
525 | |||
526 | /** |
||
527 | * Normalizes relative URLs |
||
528 | * |
||
529 | * @param DOMDocument $doc Document |
||
530 | * @param string $href URL to normalize |
||
531 | * @return string|false |
||
532 | */ |
||
533 | public function getAbsoluteURL(DOMDocument $doc, $href = '') { |
||
558 | |||
559 | } |
||
560 |