Complex classes like Parser often do a lot of different things. To break such a class down, we need to identify a cohesive component within that class. A common approach to find such a component is to look for fields/methods that share the same prefixes, or suffixes. You can also have a look at the cohesion graph to spot any un-connected, or weakly-connected components.
Once you have determined the fields that belong together, you can apply the Extract Class refactoring. If the component makes sense as a sub-class, Extract Subclass is also a candidate, and is often faster.
While breaking up the class, it is a good idea to analyze how other classes use Parser, and based on these observations, apply Extract Interface, too.
1 | <?php |
||
13 | class Parser { |
||
14 | |||
15 | /** |
||
16 | * @var ClientInterface |
||
17 | */ |
||
18 | private $client; |
||
19 | |||
20 | /** |
||
21 | * @var array |
||
22 | */ |
||
23 | private static $cache; |
||
24 | |||
25 | /** |
||
26 | * @var array |
||
27 | */ |
||
28 | private $urls = []; |
||
|
|||
29 | |||
30 | /** |
||
31 | * Constructor |
||
32 | * @param ClientInterface $client HTTP Client |
||
33 | */ |
||
34 | public function __construct(ClientInterface $client) { |
||
37 | |||
38 | /** |
||
39 | * Parses a URL into a an array of metatags |
||
40 | * |
||
41 | * @param string $url URL to parse |
||
42 | * @return array |
||
43 | */ |
||
44 | 2 | public function parse($url = '') { |
|
73 | |||
74 | /** |
||
75 | * Parses image metatags |
||
76 | * |
||
77 | * @param string $url URL of the image |
||
78 | * @return array|false |
||
79 | */ |
||
80 | 1 | public function getImageData($url = '') { |
|
91 | |||
92 | /** |
||
93 | * Parses OEmbed data |
||
94 | * |
||
95 | * @param string $url URL of the image |
||
96 | * @return array|false |
||
97 | */ |
||
98 | 2 | public function getOEmbedData($url = '') { |
|
143 | |||
144 | /** |
||
145 | * Parses metatags from DOM |
||
146 | * |
||
147 | * @param string $url URL |
||
148 | * @return array|false |
||
149 | */ |
||
150 | 1 | public function getDOMData($url = '') { |
|
178 | |||
179 | /** |
||
180 | * Check if URL exists and is reachable by making an HTTP request to retrieve header information |
||
181 | * |
||
182 | * @param string $url URL of the resource |
||
183 | * @return boolean |
||
184 | */ |
||
185 | 1 | public function exists($url = '') { |
|
192 | |||
193 | /** |
||
194 | * Validate URL |
||
195 | * |
||
196 | * @param string $url URL to validate |
||
197 | * @return bool |
||
198 | */ |
||
199 | public function isValidUrl($url = '') { |
||
220 | |||
221 | /** |
||
222 | * Returns head of the resource |
||
223 | * |
||
224 | * @param string $url URL of the resource |
||
225 | * @return Response|false |
||
226 | */ |
||
227 | 1 | public function request($url = '') { |
|
244 | |||
245 | /** |
||
246 | * Get contents of the page |
||
247 | * |
||
248 | * @param string $url URL of the resource |
||
249 | * @return string |
||
250 | */ |
||
251 | 1 | public function read($url = '') { |
|
261 | |||
262 | /** |
||
263 | * Checks if resource is an html page |
||
264 | * |
||
265 | * @param string $url URL of the resource |
||
266 | * @return boolean |
||
267 | */ |
||
268 | 1 | public function isHTML($url = '') { |
|
272 | |||
273 | /** |
||
274 | * Checks if resource is JSON |
||
275 | * |
||
276 | * @param string $url URL of the resource |
||
277 | * @return boolean |
||
278 | */ |
||
279 | 1 | public function isJSON($url = '') { |
|
283 | |||
284 | /** |
||
285 | * Checks if resource is XML |
||
286 | * |
||
287 | * @param string $url URL of the resource |
||
288 | * @return boolean |
||
289 | */ |
||
290 | 1 | public function isXML($url = '') { |
|
294 | |||
295 | /** |
||
296 | * Checks if resource is an image |
||
297 | * |
||
298 | * @param string $url URL of the resource |
||
299 | * @return boolean |
||
300 | */ |
||
301 | 1 | public function isImage($url = '') { |
|
310 | |||
311 | /** |
||
312 | * Get mime type of the URL content |
||
313 | * |
||
314 | * @param string $url URL of the resource |
||
315 | * @return string |
||
316 | */ |
||
317 | 1 | public function getContentType($url = '') { |
|
328 | |||
329 | /** |
||
330 | * Returns HTML contents of the page |
||
331 | * |
||
332 | * @param string $url URL of the resource |
||
333 | * @return string |
||
334 | */ |
||
335 | 1 | public function getHTML($url = '') { |
|
341 | |||
342 | /** |
||
343 | * Returns HTML contents of the page as a DOMDocument |
||
344 | * |
||
345 | * @param string $url URL of the resource |
||
346 | * @return DOMDocument|false |
||
347 | */ |
||
348 | 1 | public function getDOM($url = '') { |
|
370 | |||
371 | /** |
||
372 | * Parses document title |
||
373 | * |
||
374 | * @param DOMDocument $doc Document |
||
375 | * @return string |
||
376 | */ |
||
377 | 1 | public function parseTitle(DOMDocument $doc) { |
|
382 | |||
383 | /** |
||
384 | * Parses <link> tags |
||
385 | * |
||
386 | * @param DOMDocument $doc Document |
||
387 | * @return array |
||
388 | */ |
||
389 | 1 | public function parseLinkTags(DOMDocument $doc) { |
|
430 | |||
431 | /** |
||
432 | * Parses <meta> tags |
||
433 | * |
||
434 | * @param DOMDocument $doc Document |
||
435 | * @return array |
||
436 | */ |
||
437 | 1 | public function parseMetaTags(DOMDocument $doc) { |
|
520 | |||
521 | /** |
||
522 | * Parses <img> tags |
||
523 | * |
||
524 | * @param DOMDocument $doc Document |
||
525 | * @return array |
||
526 | */ |
||
527 | 1 | public function parseImgTags(DOMDocument $doc) { |
|
544 | |||
545 | /** |
||
546 | * Normalizes relative URLs |
||
547 | * |
||
548 | * @param DOMDocument $doc Document |
||
549 | * @param string $href URL to normalize |
||
550 | * @return string|false |
||
551 | */ |
||
552 | 1 | public function getAbsoluteURL(DOMDocument $doc, $href = '') { |
|
579 | |||
580 | } |
This check marks private properties in classes that are never used. Those properties can be removed.