Complex classes like Parser often do a lot of different things. To break such a class down, we need to identify a cohesive component within that class. A common approach to find such a component is to look for fields/methods that share the same prefixes, or suffixes. You can also have a look at the cohesion graph to spot any un-connected, or weakly-connected components.
Once you have determined the fields that belong together, you can apply the Extract Class refactoring. If the component makes sense as a sub-class, Extract Subclass is also a candidate, and is often faster.
While breaking up the class, it is a good idea to analyze how other classes use Parser, and based on these observations, apply Extract Interface, too.
| 1 | <?php |
||
| 13 | class Parser { |
||
| 14 | |||
| 15 | /** |
||
| 16 | * @var ClientInterface |
||
| 17 | */ |
||
| 18 | private $client; |
||
| 19 | |||
| 20 | /** |
||
| 21 | * @var array |
||
| 22 | */ |
||
| 23 | private static $cache; |
||
| 24 | |||
| 25 | /** |
||
| 26 | * @var array |
||
| 27 | */ |
||
| 28 | private $urls = []; |
||
|
|
|||
| 29 | |||
| 30 | /** |
||
| 31 | * Constructor |
||
| 32 | * @param ClientInterface $client HTTP Client |
||
| 33 | */ |
||
| 34 | public function __construct(ClientInterface $client) { |
||
| 37 | |||
| 38 | /** |
||
| 39 | * Parses a URL into a an array of metatags |
||
| 40 | * |
||
| 41 | * @param string $url URL to parse |
||
| 42 | * @return array |
||
| 43 | */ |
||
| 44 | 2 | public function parse($url = '') { |
|
| 73 | |||
| 74 | /** |
||
| 75 | * Parses image metatags |
||
| 76 | * |
||
| 77 | * @param string $url URL of the image |
||
| 78 | * @return array|false |
||
| 79 | */ |
||
| 80 | 1 | public function getImageData($url = '') { |
|
| 91 | |||
| 92 | /** |
||
| 93 | * Parses OEmbed data |
||
| 94 | * |
||
| 95 | * @param string $url URL of the image |
||
| 96 | * @return array|false |
||
| 97 | */ |
||
| 98 | 2 | public function getOEmbedData($url = '') { |
|
| 143 | |||
| 144 | /** |
||
| 145 | * Parses metatags from DOM |
||
| 146 | * |
||
| 147 | * @param string $url URL |
||
| 148 | * @return array|false |
||
| 149 | */ |
||
| 150 | 1 | public function getDOMData($url = '') { |
|
| 178 | |||
| 179 | /** |
||
| 180 | * Check if URL exists and is reachable by making an HTTP request to retrieve header information |
||
| 181 | * |
||
| 182 | * @param string $url URL of the resource |
||
| 183 | * @return boolean |
||
| 184 | */ |
||
| 185 | 1 | public function exists($url = '') { |
|
| 192 | |||
| 193 | /** |
||
| 194 | * Validate URL |
||
| 195 | * |
||
| 196 | * @param string $url URL to validate |
||
| 197 | * @return bool |
||
| 198 | */ |
||
| 199 | public function isValidUrl($url = '') { |
||
| 220 | |||
| 221 | /** |
||
| 222 | * Returns head of the resource |
||
| 223 | * |
||
| 224 | * @param string $url URL of the resource |
||
| 225 | * @return Response|false |
||
| 226 | */ |
||
| 227 | 1 | public function request($url = '') { |
|
| 244 | |||
| 245 | /** |
||
| 246 | * Get contents of the page |
||
| 247 | * |
||
| 248 | * @param string $url URL of the resource |
||
| 249 | * @return string |
||
| 250 | */ |
||
| 251 | 1 | public function read($url = '') { |
|
| 261 | |||
| 262 | /** |
||
| 263 | * Checks if resource is an html page |
||
| 264 | * |
||
| 265 | * @param string $url URL of the resource |
||
| 266 | * @return boolean |
||
| 267 | */ |
||
| 268 | 1 | public function isHTML($url = '') { |
|
| 272 | |||
| 273 | /** |
||
| 274 | * Checks if resource is JSON |
||
| 275 | * |
||
| 276 | * @param string $url URL of the resource |
||
| 277 | * @return boolean |
||
| 278 | */ |
||
| 279 | 1 | public function isJSON($url = '') { |
|
| 283 | |||
| 284 | /** |
||
| 285 | * Checks if resource is XML |
||
| 286 | * |
||
| 287 | * @param string $url URL of the resource |
||
| 288 | * @return boolean |
||
| 289 | */ |
||
| 290 | 1 | public function isXML($url = '') { |
|
| 294 | |||
| 295 | /** |
||
| 296 | * Checks if resource is an image |
||
| 297 | * |
||
| 298 | * @param string $url URL of the resource |
||
| 299 | * @return boolean |
||
| 300 | */ |
||
| 301 | 1 | public function isImage($url = '') { |
|
| 310 | |||
| 311 | /** |
||
| 312 | * Get mime type of the URL content |
||
| 313 | * |
||
| 314 | * @param string $url URL of the resource |
||
| 315 | * @return string |
||
| 316 | */ |
||
| 317 | 1 | public function getContentType($url = '') { |
|
| 328 | |||
| 329 | /** |
||
| 330 | * Returns HTML contents of the page |
||
| 331 | * |
||
| 332 | * @param string $url URL of the resource |
||
| 333 | * @return string |
||
| 334 | */ |
||
| 335 | 1 | public function getHTML($url = '') { |
|
| 341 | |||
| 342 | /** |
||
| 343 | * Returns HTML contents of the page as a DOMDocument |
||
| 344 | * |
||
| 345 | * @param string $url URL of the resource |
||
| 346 | * @return DOMDocument|false |
||
| 347 | */ |
||
| 348 | 1 | public function getDOM($url = '') { |
|
| 370 | |||
| 371 | /** |
||
| 372 | * Parses document title |
||
| 373 | * |
||
| 374 | * @param DOMDocument $doc Document |
||
| 375 | * @return string |
||
| 376 | */ |
||
| 377 | 1 | public function parseTitle(DOMDocument $doc) { |
|
| 382 | |||
| 383 | /** |
||
| 384 | * Parses <link> tags |
||
| 385 | * |
||
| 386 | * @param DOMDocument $doc Document |
||
| 387 | * @return array |
||
| 388 | */ |
||
| 389 | 1 | public function parseLinkTags(DOMDocument $doc) { |
|
| 430 | |||
| 431 | /** |
||
| 432 | * Parses <meta> tags |
||
| 433 | * |
||
| 434 | * @param DOMDocument $doc Document |
||
| 435 | * @return array |
||
| 436 | */ |
||
| 437 | 1 | public function parseMetaTags(DOMDocument $doc) { |
|
| 520 | |||
| 521 | /** |
||
| 522 | * Parses <img> tags |
||
| 523 | * |
||
| 524 | * @param DOMDocument $doc Document |
||
| 525 | * @return array |
||
| 526 | */ |
||
| 527 | 1 | public function parseImgTags(DOMDocument $doc) { |
|
| 544 | |||
| 545 | /** |
||
| 546 | * Normalizes relative URLs |
||
| 547 | * |
||
| 548 | * @param DOMDocument $doc Document |
||
| 549 | * @param string $href URL to normalize |
||
| 550 | * @return string|false |
||
| 551 | */ |
||
| 552 | 1 | public function getAbsoluteURL(DOMDocument $doc, $href = '') { |
|
| 579 | |||
| 580 | } |
This check marks private properties in classes that are never used. Those properties can be removed.