Complex classes like UserAgentClient often do a lot of different things. To break such a class down, we need to identify a cohesive component within that class. A common approach to find such a component is to look for fields/methods that share the same prefixes, or suffixes. You can also have a look at the cohesion graph to spot any un-connected, or weakly-connected components.
Once you have determined the fields that belong together, you can apply the Extract Class refactoring. If the component makes sense as a sub-class, Extract Subclass is also a candidate, and is often faster.
While breaking up the class, it is a good idea to analyze how other classes use UserAgentClient, and based on these observations, apply Extract Interface, too.
| 1 | <?php |
||
| 14 | class UserAgentClient implements RobotsTxtInterface |
||
| 15 | { |
||
| 16 | use UrlParser; |
||
| 17 | |||
| 18 | /** |
||
| 19 | * Rules |
||
| 20 | * @var array |
||
| 21 | */ |
||
| 22 | protected $rules; |
||
| 23 | |||
| 24 | /** |
||
| 25 | * User-agent |
||
| 26 | * @var string |
||
| 27 | */ |
||
| 28 | protected $userAgent; |
||
| 29 | |||
| 30 | /** |
||
| 31 | * Robots.txt base URL |
||
| 32 | * @var string |
||
| 33 | */ |
||
| 34 | protected $base; |
||
| 35 | |||
| 36 | /** |
||
| 37 | * Status code parser |
||
| 38 | * @var StatusCodeParser |
||
| 39 | */ |
||
| 40 | protected $statusCodeParser; |
||
| 41 | |||
| 42 | /** |
||
| 43 | * Comment export status |
||
| 44 | * @var bool |
||
| 45 | */ |
||
| 46 | protected $commentsExported = false; |
||
| 47 | |||
| 48 | /** |
||
| 49 | * UserAgentClient constructor. |
||
| 50 | * |
||
| 51 | * @param array $rules |
||
| 52 | * @param string $userAgent |
||
| 53 | * @param string $baseUrl |
||
| 54 | * @param int|null $statusCode |
||
| 55 | */ |
||
| 56 | public function __construct(array $rules, $userAgent, $baseUrl, $statusCode) |
||
| 63 | |||
| 64 | /** |
||
| 65 | * Check if URL is allowed to crawl |
||
| 66 | * |
||
| 67 | * @param string $url |
||
| 68 | * @return bool |
||
| 69 | */ |
||
| 70 | public function isAllowed($url) |
||
| 74 | |||
| 75 | /** |
||
| 76 | * Check |
||
| 77 | * |
||
| 78 | * @param string $directive |
||
| 79 | * @param string $url - URL to check |
||
| 80 | * @return bool |
||
| 81 | * @throws ClientException |
||
| 82 | */ |
||
| 83 | protected function check($directive, $url) |
||
| 101 | |||
| 102 | /** |
||
| 103 | * Check if the URL belongs to current robots.txt |
||
| 104 | * |
||
| 105 | * @param $urls |
||
| 106 | * @return bool |
||
| 107 | */ |
||
| 108 | protected function isUrlApplicable($urls) |
||
| 122 | |||
| 123 | /** |
||
| 124 | * Check if URL is disallowed to crawl |
||
| 125 | * |
||
| 126 | * @param string $url |
||
| 127 | * @return bool |
||
| 128 | */ |
||
| 129 | public function isDisallowed($url) |
||
| 133 | |||
| 134 | /** |
||
| 135 | * Get Cache-delay |
||
| 136 | * |
||
| 137 | * @return float|int |
||
| 138 | */ |
||
| 139 | public function getCacheDelay() |
||
| 144 | |||
| 145 | /** |
||
| 146 | * Get Crawl-delay |
||
| 147 | * |
||
| 148 | * @return float|int |
||
| 149 | */ |
||
| 150 | public function getCrawlDelay() |
||
| 155 | |||
| 156 | /** |
||
| 157 | * Get Request-rate for current timestamp |
||
| 158 | * |
||
| 159 | * @param int|null $timestamp |
||
| 160 | * @return float|int |
||
| 161 | */ |
||
| 162 | protected function getRequestRate($timestamp = null) |
||
| 176 | |||
| 177 | /** |
||
| 178 | * Determine Request rates |
||
| 179 | * |
||
| 180 | * @param $timestamp |
||
| 181 | * @return array |
||
| 182 | */ |
||
| 183 | protected function determineRequestRates($timestamp) |
||
| 209 | |||
| 210 | /** |
||
| 211 | * Get Request-rates |
||
| 212 | * |
||
| 213 | * @return array |
||
| 214 | */ |
||
| 215 | public function getRequestRates() |
||
| 220 | |||
| 221 | /** |
||
| 222 | * Rule export |
||
| 223 | * |
||
| 224 | * @return array |
||
| 225 | */ |
||
| 226 | public function export() |
||
| 236 | |||
| 237 | /** |
||
| 238 | * UserAgentClient destructor. |
||
| 239 | */ |
||
| 240 | public function __destruct() |
||
| 249 | |||
| 250 | /** |
||
| 251 | * Get Comments |
||
| 252 | * |
||
| 253 | * @return array |
||
| 254 | */ |
||
| 255 | public function getComments() |
||
| 261 | |||
| 262 | /** |
||
| 263 | * Get Visit-time |
||
| 264 | * |
||
| 265 | * @return array|false |
||
| 266 | */ |
||
| 267 | public function getVisitTime() |
||
| 272 | } |
||
| 273 |