Complex classes like Parser often do a lot of different things. To break such a class down, we need to identify a cohesive component within that class. A common approach to find such a component is to look for fields/methods that share the same prefixes, or suffixes. You can also have a look at the cohesion graph to spot any un-connected, or weakly-connected components.
Once you have determined the fields that belong together, you can apply the Extract Class refactoring. If the component makes sense as a sub-class, Extract Subclass is also a candidate, and is often faster.
While breaking up the class, it is a good idea to analyze how other classes use Parser, and based on these observations, apply Extract Interface, too.
| 1 | <?php |
||
| 13 | class Parser { |
||
| 14 | |||
| 15 | /** |
||
| 16 | * @var ClientInterface |
||
| 17 | */ |
||
| 18 | private $client; |
||
| 19 | |||
| 20 | /** |
||
| 21 | * @var array |
||
| 22 | */ |
||
| 23 | private static $cache; |
||
| 24 | |||
| 25 | /** |
||
| 26 | * Constructor |
||
| 27 | * @param ClientInterface $client HTTP Client |
||
| 28 | */ |
||
| 29 | public function __construct(ClientInterface $client) { |
||
| 32 | |||
| 33 | /** |
||
| 34 | * Parses a URL into a an array of metatags |
||
| 35 | * |
||
| 36 | * @param string $url URL to parse |
||
| 37 | * @return array |
||
| 38 | */ |
||
| 39 | 1 | public function parse($url = '') { |
|
| 69 | |||
| 70 | /** |
||
| 71 | * Parses image metatags |
||
| 72 | * |
||
| 73 | * @param string $url URL of the image |
||
| 74 | * @return array|false |
||
| 75 | */ |
||
| 76 | 1 | public function getImageData($url = '') { |
|
| 87 | |||
| 88 | /** |
||
| 89 | * Parses OEmbed data |
||
| 90 | * |
||
| 91 | * @param string $url URL of the image |
||
| 92 | * @return array|false |
||
| 93 | */ |
||
| 94 | 2 | public function getOEmbedData($url = '') { |
|
| 139 | |||
| 140 | /** |
||
| 141 | * Parses metatags from DOM |
||
| 142 | * |
||
| 143 | * @param string $url URL |
||
| 144 | * @return array|false |
||
| 145 | */ |
||
| 146 | 1 | public function getDOMData($url = '') { |
|
| 174 | |||
| 175 | /** |
||
| 176 | * Check if URL exists and is reachable by making an HTTP request to retrieve header information |
||
| 177 | * |
||
| 178 | * @param string $url URL of the resource |
||
| 179 | * @return boolean |
||
| 180 | */ |
||
| 181 | 1 | public function exists($url = '') { |
|
| 188 | |||
| 189 | /** |
||
| 190 | * Returns head of the resource |
||
| 191 | * |
||
| 192 | * @param string $url URL of the resource |
||
| 193 | * @return Response|false |
||
| 194 | */ |
||
| 195 | 1 | public function request($url = '') { |
|
| 211 | |||
| 212 | /** |
||
| 213 | * Get contents of the page |
||
| 214 | * |
||
| 215 | * @param string $url URL of the resource |
||
| 216 | * @return string |
||
| 217 | */ |
||
| 218 | 1 | public function read($url = '') { |
|
| 228 | |||
| 229 | /** |
||
| 230 | * Checks if resource is an html page |
||
| 231 | * |
||
| 232 | * @param string $url URL of the resource |
||
| 233 | * @return boolean |
||
| 234 | */ |
||
| 235 | 1 | public function isHTML($url = '') { |
|
| 239 | |||
| 240 | /** |
||
| 241 | * Checks if resource is JSON |
||
| 242 | * |
||
| 243 | * @param string $url URL of the resource |
||
| 244 | * @return boolean |
||
| 245 | */ |
||
| 246 | 1 | public function isJSON($url = '') { |
|
| 250 | |||
| 251 | /** |
||
| 252 | * Checks if resource is XML |
||
| 253 | * |
||
| 254 | * @param string $url URL of the resource |
||
| 255 | * @return boolean |
||
| 256 | */ |
||
| 257 | 1 | public function isXML($url = '') { |
|
| 261 | |||
| 262 | /** |
||
| 263 | * Checks if resource is an image |
||
| 264 | * |
||
| 265 | * @param string $url URL of the resource |
||
| 266 | * @return boolean |
||
| 267 | */ |
||
| 268 | 1 | public function isImage($url = '') { |
|
| 277 | |||
| 278 | /** |
||
| 279 | * Get mime type of the URL content |
||
| 280 | * |
||
| 281 | * @param string $url URL of the resource |
||
| 282 | * @return string |
||
| 283 | */ |
||
| 284 | 1 | public function getContentType($url = '') { |
|
| 295 | |||
| 296 | /** |
||
| 297 | * Returns HTML contents of the page |
||
| 298 | * |
||
| 299 | * @param string $url URL of the resource |
||
| 300 | * @return string |
||
| 301 | */ |
||
| 302 | 1 | public function getHTML($url = '') { |
|
| 308 | |||
| 309 | /** |
||
| 310 | * Returns HTML contents of the page as a DOMDocument |
||
| 311 | * |
||
| 312 | * @param string $url URL of the resource |
||
| 313 | * @return DOMDocument|false |
||
| 314 | */ |
||
| 315 | 1 | public function getDOM($url = '') { |
|
| 331 | |||
| 332 | /** |
||
| 333 | * Parses document title |
||
| 334 | * |
||
| 335 | * @param DOMDocument $doc Document |
||
| 336 | * @return string |
||
| 337 | */ |
||
| 338 | 1 | public function parseTitle(DOMDocument $doc) { |
|
| 343 | |||
| 344 | /** |
||
| 345 | * Parses <link> tags |
||
| 346 | * |
||
| 347 | * @param DOMDocument $doc Document |
||
| 348 | * @return array |
||
| 349 | */ |
||
| 350 | 1 | public function parseLinkTags(DOMDocument $doc) { |
|
| 385 | |||
| 386 | /** |
||
| 387 | * Parses <meta> tags |
||
| 388 | * |
||
| 389 | * @param DOMDocument $doc Document |
||
| 390 | * @return array |
||
| 391 | */ |
||
| 392 | 1 | public function parseMetaTags(DOMDocument $doc) { |
|
| 468 | |||
| 469 | /** |
||
| 470 | * Parses <img> tags |
||
| 471 | * |
||
| 472 | * @param DOMDocument $doc Document |
||
| 473 | * @return array |
||
| 474 | */ |
||
| 475 | 1 | public function parseImgTags(DOMDocument $doc) { |
|
| 487 | |||
| 488 | /** |
||
| 489 | * Normalizes relative URLs |
||
| 490 | * |
||
| 491 | * @param DOMDocument $doc Document |
||
| 492 | * @param string $href URL to normalize |
||
| 493 | * @return string|false |
||
| 494 | */ |
||
| 495 | 1 | public function getAbsoluteURL(DOMDocument $doc, $href = '') { |
|
| 519 | |||
| 520 | } |
||
| 521 |
As per the PSR-2 coding standard, there must not be a space in front of the colon in case statements.
To learn more about the PSR-2 coding standard, please refer to the PHP-Fig.