Complex classes like TxtParser often do a lot of different things. To break such a class down, we need to identify a cohesive component within that class. A common approach to find such a component is to look for fields/methods that share the same prefixes, or suffixes. You can also have a look at the cohesion graph to spot any un-connected, or weakly-connected components.
Once you have determined the fields that belong together, you can apply the Extract Class refactoring. If the component makes sense as a sub-class, Extract Subclass is also a candidate, and is often faster.
While breaking up the class, it is a good idea to analyze how other classes use TxtParser, and based on these observations, apply Extract Interface, too.
1 | <?php |
||
11 | class TxtParser |
||
12 | { |
||
13 | /** |
||
14 | * Robots.txt max length in bytes |
||
15 | */ |
||
16 | const DEFAULT_BYTE_LIMIT = 500000; |
||
17 | |||
18 | /** |
||
19 | * Max rule length |
||
20 | */ |
||
21 | const RULE_MAX_LENGTH = 500; |
||
22 | |||
23 | /** |
||
24 | * Directives |
||
25 | */ |
||
26 | const DIRECTIVE_ALLOW = 'allow'; |
||
27 | const DIRECTIVE_CACHE_DELAY = 'cache-delay'; // unofficial |
||
28 | const DIRECTIVE_CLEAN_PARAM = 'clean-param'; // Yandex only |
||
29 | const DIRECTIVE_CRAWL_DELAY = 'crawl-delay'; |
||
30 | const DIRECTIVE_DISALLOW = 'disallow'; |
||
31 | const DIRECTIVE_HOST = 'host'; // Yandex only |
||
32 | const DIRECTIVE_SITEMAP = 'sitemap'; |
||
33 | const DIRECTIVE_USER_AGENT = 'user-agent'; |
||
34 | |||
35 | /** |
||
36 | * Default User-Agent |
||
37 | */ |
||
38 | const FALLBACK_USER_AGENT = '*'; |
||
39 | |||
40 | /** |
||
41 | * RAW robots.txt content |
||
42 | * @var string |
||
43 | */ |
||
44 | private $raw = ''; |
||
45 | |||
46 | /** |
||
47 | * Rule array |
||
48 | * @var array |
||
49 | */ |
||
50 | private $rules = []; |
||
51 | |||
52 | /** |
||
53 | * User-Agents |
||
54 | * @var array |
||
55 | */ |
||
56 | private $userAgents = [self::FALLBACK_USER_AGENT]; |
||
57 | |||
58 | /** |
||
59 | * Current line |
||
60 | * @var string |
||
61 | */ |
||
62 | private $line = ''; |
||
63 | |||
64 | /** |
||
65 | * Previous directive |
||
66 | * @var string |
||
67 | */ |
||
68 | private $previous; |
||
69 | |||
70 | /** |
||
71 | * Current Directive |
||
72 | * @var string |
||
73 | */ |
||
74 | private $directive; |
||
75 | |||
76 | /** |
||
77 | * Current Rule |
||
78 | * @var array|string |
||
79 | */ |
||
80 | private $rule; |
||
81 | |||
82 | /** |
||
83 | * Constructor |
||
84 | * |
||
85 | * @param string $content - file content |
||
86 | * @param string|null $encoding - character encoding |
||
87 | * @param int|null $byteLimit - maximum of bytes to parse |
||
88 | * @throws TxtParserException |
||
89 | */ |
||
90 | public function __construct($content, $encoding = null, $byteLimit = self::DEFAULT_BYTE_LIMIT) |
||
102 | |||
103 | /** |
||
104 | * Parse robots.txt |
||
105 | * |
||
106 | * @return void |
||
107 | */ |
||
108 | private function parseTxt() |
||
128 | |||
129 | /** |
||
130 | * Generate Directive:Rule pair |
||
131 | * |
||
132 | * @return bool |
||
133 | */ |
||
134 | private function generateRulePair() |
||
151 | |||
152 | /** |
||
153 | * Directives and sub directives |
||
154 | * |
||
155 | * @param string|null $parent |
||
156 | * @return array |
||
157 | */ |
||
158 | private function directives($parent = null) |
||
186 | |||
187 | /** |
||
188 | * Parse line |
||
189 | * |
||
190 | * @param string|null $parent |
||
191 | * @return array|false |
||
192 | */ |
||
193 | private function parseLine($parent = null) |
||
212 | |||
213 | /** |
||
214 | * Add value to directive |
||
215 | * |
||
216 | * @return array|false |
||
217 | */ |
||
218 | private function add() |
||
238 | |||
239 | /** |
||
240 | * Add an Allow or Disallow rule |
||
241 | * |
||
242 | * @return array |
||
243 | */ |
||
244 | private function addDisAllow() |
||
261 | |||
262 | /** |
||
263 | * Add float value |
||
264 | * |
||
265 | * @return array|false |
||
266 | */ |
||
267 | private function addFloat() |
||
276 | |||
277 | /** |
||
278 | * Add Clean-Param record |
||
279 | * |
||
280 | * @return array |
||
281 | */ |
||
282 | private function addCleanParam() |
||
291 | |||
292 | /** |
||
293 | * Explode Clean-Param rule |
||
294 | * |
||
295 | * @param string $rule |
||
296 | * @return array |
||
297 | */ |
||
298 | private function explodeCleanParamRule($rule) |
||
311 | |||
312 | /** |
||
313 | * URL encoder according to RFC 3986 |
||
314 | * Returns a string containing the encoded URL with disallowed characters converted to their percentage encodings. |
||
315 | * @link http://publicmind.in/blog/url-encoding/ |
||
316 | * |
||
317 | * @param string $url |
||
318 | * @return string |
||
319 | */ |
||
320 | private function urlEncode($url) |
||
345 | |||
346 | /** |
||
347 | * Add Host |
||
348 | * |
||
349 | * @return array|false |
||
350 | */ |
||
351 | private function addHost() |
||
371 | |||
372 | /** |
||
373 | * Validate host name |
||
374 | * |
||
375 | * @link http://stackoverflow.com/questions/1755144/how-to-validate-domain-name-in-php |
||
376 | * |
||
377 | * @param string $host |
||
378 | * @return bool |
||
379 | */ |
||
380 | private static function urlValidateHost($host) |
||
389 | |||
390 | /** |
||
391 | * Validate URL scheme |
||
392 | * |
||
393 | * @param string $scheme |
||
394 | * @return bool |
||
395 | */ |
||
396 | private static function urlValidateScheme($scheme) |
||
404 | |||
405 | /** |
||
406 | * Add Sitemap |
||
407 | * |
||
408 | * @return array|false |
||
409 | */ |
||
410 | private function addSitemap() |
||
421 | |||
422 | /** |
||
423 | * Validate URL |
||
424 | * |
||
425 | * @param string $url |
||
426 | * @return bool |
||
427 | */ |
||
428 | public function urlValidate($url) |
||
437 | |||
438 | /** |
||
439 | * Set User-Agent(s) |
||
440 | * |
||
441 | * @return array |
||
442 | */ |
||
443 | private function setUserAgent() |
||
456 | |||
457 | /** |
||
458 | * Assign User-Agent dependent rules to the User-Agent arrays |
||
459 | * |
||
460 | * @return array |
||
461 | */ |
||
462 | private function assignUserAgent() |
||
473 | |||
474 | /** |
||
475 | * Get rules |
||
476 | */ |
||
477 | public function getRules() |
||
481 | } |
||
482 |
Our type inference engine has found a suspicous assignment of a value to a property. This check raises an issue when a value that can be of a mixed type is assigned to a property that is type hinted more strictly.
For example, imagine you have a variable
$accountId
that can either hold an Id object or false (if there is no account id yet). Your code now assigns that value to theid
property of an instance of theAccount
class. This class holds a proper account, so the id value must no longer be false.Either this assignment is in error or a type check should be added for that assignment.