Complex classes like Parser often do a lot of different things. To break such a class down, we need to identify a cohesive component within that class. A common approach to find such a component is to look for fields/methods that share the same prefixes, or suffixes. You can also have a look at the cohesion graph to spot any un-connected, or weakly-connected components.
Once you have determined the fields that belong together, you can apply the Extract Class refactoring. If the component makes sense as a sub-class, Extract Subclass is also a candidate, and is often faster.
While breaking up the class, it is a good idea to analyze how other classes use Parser, and based on these observations, apply Extract Interface, too.
| 1 | <?php |
||
| 13 | class Parser { |
||
| 14 | |||
| 15 | /** |
||
| 16 | * @var ClientInterface |
||
| 17 | */ |
||
| 18 | private $client; |
||
| 19 | |||
| 20 | /** |
||
| 21 | * @var array |
||
| 22 | */ |
||
| 23 | private static $cache; |
||
| 24 | |||
| 25 | /** |
||
| 26 | * Constructor |
||
| 27 | * @param ClientInterface $client HTTP Client |
||
| 28 | */ |
||
| 29 | public function __construct(ClientInterface $client) { |
||
| 32 | |||
| 33 | /** |
||
| 34 | * Parses a URL into a an array of metatags |
||
| 35 | * |
||
| 36 | * @param string $url URL to parse |
||
| 37 | * @return array |
||
| 38 | */ |
||
| 39 | 1 | public function parse($url = '') { |
|
| 69 | |||
| 70 | /** |
||
| 71 | * Parses image metatags |
||
| 72 | * |
||
| 73 | * @param string $url URL of the image |
||
| 74 | * @return array|false |
||
| 75 | */ |
||
| 76 | 1 | public function getImageData($url = '') { |
|
| 87 | |||
| 88 | /** |
||
| 89 | * Parses OEmbed data |
||
| 90 | * |
||
| 91 | * @param string $url URL of the image |
||
| 92 | * @return array|false |
||
| 93 | */ |
||
| 94 | 2 | public function getOEmbedData($url = '') { |
|
| 139 | |||
| 140 | /** |
||
| 141 | * Parses metatags from DOM |
||
| 142 | * |
||
| 143 | * @param string $url URL |
||
| 144 | * @return array|false |
||
| 145 | */ |
||
| 146 | 1 | public function getDOMData($url = '') { |
|
| 174 | |||
| 175 | /** |
||
| 176 | * Check if URL exists and is reachable by making an HTTP request to retrieve header information |
||
| 177 | * |
||
| 178 | * @param string $url URL of the resource |
||
| 179 | * @return boolean |
||
| 180 | */ |
||
| 181 | 1 | public function exists($url = '') { |
|
| 188 | |||
| 189 | /** |
||
| 190 | * Validate URL |
||
| 191 | * |
||
| 192 | * @param string $url URL to validate |
||
| 193 | * @return bool |
||
| 194 | */ |
||
| 195 | 1 | public function isValidUrl($url = '') { |
|
| 216 | |||
| 217 | /** |
||
| 218 | 1 | * Returns head of the resource |
|
| 219 | 1 | * |
|
| 220 | 1 | * @param string $url URL of the resource |
|
| 221 | 1 | * @return Response|false |
|
| 222 | */ |
||
| 223 | public function request($url = '') { |
||
| 240 | |||
| 241 | /** |
||
| 242 | * Get contents of the page |
||
| 243 | * |
||
| 244 | * @param string $url URL of the resource |
||
| 245 | * @return string |
||
| 246 | 1 | */ |
|
| 247 | 1 | public function read($url = '') { |
|
| 257 | 1 | ||
| 258 | 1 | /** |
|
| 259 | 1 | * Checks if resource is an html page |
|
| 260 | * |
||
| 261 | * @param string $url URL of the resource |
||
| 262 | * @return boolean |
||
| 263 | */ |
||
| 264 | public function isHTML($url = '') { |
||
| 268 | 1 | ||
| 269 | 1 | /** |
|
| 270 | 1 | * Checks if resource is JSON |
|
| 271 | 1 | * |
|
| 272 | 1 | * @param string $url URL of the resource |
|
| 273 | * @return boolean |
||
| 274 | */ |
||
| 275 | 1 | public function isJSON($url = '') { |
|
| 279 | |||
| 280 | /** |
||
| 281 | * Checks if resource is XML |
||
| 282 | * |
||
| 283 | * @param string $url URL of the resource |
||
| 284 | 1 | * @return boolean |
|
| 285 | 1 | */ |
|
| 286 | 1 | public function isXML($url = '') { |
|
| 290 | 1 | ||
| 291 | /** |
||
| 292 | 1 | * Checks if resource is an image |
|
| 293 | 1 | * |
|
| 294 | * @param string $url URL of the resource |
||
| 295 | * @return boolean |
||
| 296 | */ |
||
| 297 | public function isImage($url = '') { |
||
| 306 | 1 | ||
| 307 | /** |
||
| 308 | * Get mime type of the URL content |
||
| 309 | * |
||
| 310 | * @param string $url URL of the resource |
||
| 311 | * @return string |
||
| 312 | */ |
||
| 313 | public function getContentType($url = '') { |
||
| 324 | |||
| 325 | /** |
||
| 326 | 1 | * Returns HTML contents of the page |
|
| 327 | 1 | * |
|
| 328 | 1 | * @param string $url URL of the resource |
|
| 329 | 1 | * @return string |
|
| 330 | */ |
||
| 331 | public function getHTML($url = '') { |
||
| 337 | |||
| 338 | 1 | /** |
|
| 339 | 1 | * Returns HTML contents of the page as a DOMDocument |
|
| 340 | 1 | * |
|
| 341 | 1 | * @param string $url URL of the resource |
|
| 342 | * @return DOMDocument|false |
||
| 343 | */ |
||
| 344 | public function getDOM($url = '') { |
||
| 360 | |||
| 361 | 1 | /** |
|
| 362 | 1 | * Parses document title |
|
| 363 | 1 | * |
|
| 364 | * @param DOMDocument $doc Document |
||
| 365 | 1 | * @return string |
|
| 366 | 1 | */ |
|
| 367 | 1 | public function parseTitle(DOMDocument $doc) { |
|
| 372 | 1 | ||
| 373 | 1 | /** |
|
| 374 | 1 | * Parses <link> tags |
|
| 375 | * |
||
| 376 | 1 | * @param DOMDocument $doc Document |
|
| 377 | 1 | * @return array |
|
| 378 | 1 | */ |
|
| 379 | 1 | public function parseLinkTags(DOMDocument $doc) { |
|
| 417 | |||
| 418 | /** |
||
| 419 | * Parses <meta> tags |
||
| 420 | * |
||
| 421 | 1 | * @param DOMDocument $doc Document |
|
| 422 | 1 | * @return array |
|
| 423 | 1 | */ |
|
| 424 | 1 | public function parseMetaTags(DOMDocument $doc) { |
|
| 503 | 1 | ||
| 504 | 1 | /** |
|
| 505 | * Parses <img> tags |
||
| 506 | * |
||
| 507 | 1 | * @param DOMDocument $doc Document |
|
| 508 | * @return array |
||
| 509 | */ |
||
| 510 | 1 | public function parseImgTags(DOMDocument $doc) { |
|
| 525 | |||
| 526 | /** |
||
| 527 | * Normalizes relative URLs |
||
| 528 | * |
||
| 529 | * @param DOMDocument $doc Document |
||
| 530 | * @param string $href URL to normalize |
||
| 531 | * @return string|false |
||
| 532 | */ |
||
| 533 | public function getAbsoluteURL(DOMDocument $doc, $href = '') { |
||
| 558 | |||
| 559 | } |
||
| 560 |