Duplicate code is one of the most pungent code smells. A rule that is often used is to re-structure code once it is duplicated in three or more places.
Common duplication problems, and corresponding solutions are:
Complex classes like ScriptedAgentDetector often do a lot of different things. To break such a class down, we need to identify a cohesive component within that class. A common approach to find such a component is to look for fields/methods that share the same prefixes, or suffixes. You can also have a look at the cohesion graph to spot any un-connected, or weakly-connected components.
Once you have determined the fields that belong together, you can apply the Extract Class refactoring. If the component makes sense as a sub-class, Extract Subclass is also a candidate, and is often faster.
While breaking up the class, it is a good idea to analyze how other classes use ScriptedAgentDetector, and based on these observations, apply Extract Interface, too.
1 | <?php |
||
5 | class ScriptedAgentDetector implements DetectorInterface |
||
6 | { |
||
7 | const FUNC_PREFIX = 'checkRobot'; |
||
8 | |||
9 | protected static $userAgentString; |
||
10 | |||
11 | /** |
||
12 | * @var ScriptedAgent |
||
13 | */ |
||
14 | protected static $scriptedAgent; |
||
15 | |||
16 | protected static $robotsList = array( |
||
17 | 'Google', |
||
18 | 'Baidu', |
||
19 | 'Facebook', |
||
20 | 'Bing', |
||
21 | 'Slurp', |
||
22 | 'Twitter', |
||
23 | 'Skype', |
||
24 | 'W3CValidator', |
||
25 | 'wkHTMLtoPDF', |
||
26 | 'Yandex', |
||
27 | 'Apple', |
||
28 | 'Paperli', |
||
29 | 'Ahrefs', |
||
30 | 'MJ12', |
||
31 | 'LiveLap', |
||
32 | 'Webdav', |
||
33 | 'MetaURI', |
||
34 | 'TLSProbe', |
||
35 | 'ScoopIt', |
||
36 | 'Netcraft', |
||
37 | 'Curl', |
||
38 | 'Python', |
||
39 | 'GoLang', |
||
40 | 'Perl', |
||
41 | 'Wget', |
||
42 | 'ZGrab', |
||
43 | 'Java', |
||
44 | 'Shellshock', |
||
45 | 'Browershots', |
||
46 | 'Whois', |
||
47 | 'MageReport', |
||
48 | 'Adbeat', |
||
49 | 'Ubermetrics', |
||
50 | 'Socialrank', |
||
51 | 'GlutenFree', |
||
52 | 'ICQ', |
||
53 | 'Proximic', |
||
54 | 'Verisign' |
||
55 | ); |
||
56 | |||
57 | /** |
||
58 | * Routine to determine the scripted agent type. |
||
59 | * |
||
60 | * @param ScriptedAgent $scriptedAgent |
||
61 | * @param UserAgent $userAgent |
||
62 | * |
||
63 | * @return bool |
||
64 | */ |
||
65 | public static function detect(ScriptedAgent $scriptedAgent, UserAgent $userAgent = null) |
||
87 | |||
88 | /** |
||
89 | * Determine if the browser is wkHTMLtoPDF |
||
90 | * |
||
91 | * @return bool |
||
92 | */ |
||
93 | View Code Duplication | public static function checkRobotwkHTMLtoPDF() |
|
103 | |||
104 | /** |
||
105 | * Determine if the browser is the ICQ preview. |
||
106 | * |
||
107 | * @return bool |
||
108 | */ |
||
109 | public static function checkRobotICQ() |
||
121 | |||
122 | /** |
||
123 | * Determine if the agent is GoogleBot, or a google ads bot. |
||
124 | * |
||
125 | * @return bool |
||
126 | */ |
||
127 | public static function checkRobotGoogle() |
||
159 | |||
160 | /** |
||
161 | * Determine if the agent is the Baidu spider. |
||
162 | * |
||
163 | * @return bool |
||
164 | */ |
||
165 | View Code Duplication | public static function checkRobotBaidu() |
|
175 | |||
176 | /** |
||
177 | * Determine if the agent is the Facebook preview bot. |
||
178 | * |
||
179 | * @return bool |
||
180 | */ |
||
181 | View Code Duplication | public static function checkRobotFacebook() |
|
191 | |||
192 | /** |
||
193 | * Determine if the agent is the bing spider, bing preview bot, or MSN bot |
||
194 | * |
||
195 | * @return bool |
||
196 | */ |
||
197 | public static function checkRobotBing() |
||
225 | |||
226 | /** |
||
227 | * Determine if the agent is the Yahoo Slurp! Spider. |
||
228 | * |
||
229 | * @return bool |
||
230 | * |
||
231 | */ |
||
232 | View Code Duplication | public static function checkRobotSlurp() |
|
242 | |||
243 | /** |
||
244 | * Determine if the agent is the twitter preview bot. |
||
245 | * |
||
246 | * @return bool |
||
247 | */ |
||
248 | View Code Duplication | public static function checkRobotTwitter() |
|
258 | |||
259 | /** |
||
260 | * Determine if the agent is the skype preview bot. |
||
261 | * |
||
262 | * @return bool |
||
263 | */ |
||
264 | View Code Duplication | public static function checkRobotSkype() |
|
274 | |||
275 | /** |
||
276 | * Determine if the agent is the W3C Validator tool. |
||
277 | * |
||
278 | * @return bool |
||
279 | */ |
||
280 | public static function checkRobotW3CValidator() |
||
304 | |||
305 | /** |
||
306 | * Determine if the agent is the Yandex spider. |
||
307 | * |
||
308 | * @return bool |
||
309 | */ |
||
310 | View Code Duplication | public static function checkRobotYandex() |
|
320 | |||
321 | /** |
||
322 | * Determine if the agent is the AppleBot |
||
323 | * |
||
324 | * @return bool |
||
325 | */ |
||
326 | View Code Duplication | public static function checkRobotApple() |
|
336 | |||
337 | /** |
||
338 | * Determine if the agent is the Paper.li bot. |
||
339 | * |
||
340 | * @return bool |
||
341 | */ |
||
342 | View Code Duplication | public static function checkRobotPaperli() |
|
352 | |||
353 | /** |
||
354 | * Determine if the agent is the Ahrefs survey. |
||
355 | * |
||
356 | * @return bool |
||
357 | */ |
||
358 | View Code Duplication | public static function checkRobotAhrefs() |
|
368 | |||
369 | /** |
||
370 | * Determine if the agent is the Majestic 12 spider. |
||
371 | * |
||
372 | * @return bool |
||
373 | */ |
||
374 | View Code Duplication | public static function checkRobotMJ12() |
|
384 | |||
385 | /** |
||
386 | * Determine if the agent is the LiveLap spider. |
||
387 | * |
||
388 | * @return bool |
||
389 | */ |
||
390 | View Code Duplication | public static function checkRobotLiveLap() |
|
400 | |||
401 | /** |
||
402 | * Determine if the agent is a Web Distributed Authoring and Versioning client. Usually unexpected WebDAV requests are hack attempts. |
||
403 | * |
||
404 | * @return bool |
||
405 | */ |
||
406 | View Code Duplication | public static function checkRobotWebdav() |
|
417 | |||
418 | /** |
||
419 | * Determine if the agent is the MetaURI scraper. |
||
420 | * |
||
421 | * @return bool |
||
422 | */ |
||
423 | View Code Duplication | public static function checkRobotMetaURI() |
|
433 | |||
434 | /** |
||
435 | * Determine if the agent is the TLSProbe tool. |
||
436 | * |
||
437 | * @return bool |
||
438 | */ |
||
439 | View Code Duplication | public static function checkRobotTLSProbe() |
|
449 | |||
450 | /** |
||
451 | * Determine if the agent is the scoop.it bots. |
||
452 | * |
||
453 | * @return bool |
||
454 | */ |
||
455 | View Code Duplication | public static function checkRobotScoopIt() |
|
466 | |||
467 | /** |
||
468 | * Determine if the agent is the Netcraft SSL Survey. |
||
469 | * |
||
470 | * @return bool |
||
471 | */ |
||
472 | View Code Duplication | public static function checkRobotNetcraft() |
|
482 | |||
483 | /** |
||
484 | * Determine if the agent is the curl library/cli tool. |
||
485 | * |
||
486 | * @return bool |
||
487 | */ |
||
488 | View Code Duplication | public static function checkRobotCurl() |
|
498 | |||
499 | /** |
||
500 | * Determine if the agent is the python programming language. |
||
501 | * |
||
502 | * @return bool |
||
503 | */ |
||
504 | View Code Duplication | public static function checkRobotPython() |
|
515 | |||
516 | /** |
||
517 | * Determine if the agent is the GoLang programming language. |
||
518 | * |
||
519 | * @return bool |
||
520 | */ |
||
521 | View Code Duplication | public static function checkRobotGoLang() |
|
531 | |||
532 | /** |
||
533 | * Determine if the agent is the perl programming language. |
||
534 | * |
||
535 | * @return bool |
||
536 | */ |
||
537 | View Code Duplication | public static function checkRobotPerl() |
|
547 | |||
548 | /** |
||
549 | * Determine if the agent is the wget tool. |
||
550 | * |
||
551 | * @return bool |
||
552 | */ |
||
553 | View Code Duplication | public static function checkRobotWget() |
|
563 | |||
564 | /** |
||
565 | * Determine if the agent is the zgrab TLS banner tool. |
||
566 | * |
||
567 | * @return bool |
||
568 | */ |
||
569 | View Code Duplication | public static function checkRobotZGrab() |
|
579 | |||
580 | /** |
||
581 | * Determine if the agent is the Java programming language. |
||
582 | * |
||
583 | * @return bool |
||
584 | */ |
||
585 | View Code Duplication | public static function checkRobotJava() |
|
595 | |||
596 | /** |
||
597 | * Determine if the agent is the ShellShock exploit. |
||
598 | * |
||
599 | * @return bool |
||
600 | */ |
||
601 | View Code Duplication | public static function checkRobotShellshock() |
|
611 | |||
612 | /** |
||
613 | * Determine if the agent is the browsershots testing tool. |
||
614 | * |
||
615 | * @return bool |
||
616 | */ |
||
617 | View Code Duplication | public static function checkRobotBrowershots() |
|
627 | |||
628 | /** |
||
629 | * Determine if the agent is the who.is spider. |
||
630 | * |
||
631 | * @return bool |
||
632 | */ |
||
633 | View Code Duplication | public static function checkRobotWhois() |
|
643 | |||
644 | /** |
||
645 | * Determine if the agent is the MageReport exploit survey. |
||
646 | * |
||
647 | * @return bool |
||
648 | */ |
||
649 | View Code Duplication | public static function checkRobotMageReport() |
|
659 | |||
660 | /** |
||
661 | * Determine if the agent is the AdBeat advertising survey. |
||
662 | * |
||
663 | * @return bool |
||
664 | */ |
||
665 | View Code Duplication | public static function checkRobotAdbeat() |
|
675 | |||
676 | /** |
||
677 | * Determine if the agent is the SocialRankIO crawler. |
||
678 | * |
||
679 | * @return bool |
||
680 | */ |
||
681 | View Code Duplication | public static function checkRobotSocialrank() |
|
691 | |||
692 | /** |
||
693 | * Determine if the agent is the Gluten Free crawler. |
||
694 | * |
||
695 | * @return bool |
||
696 | */ |
||
697 | View Code Duplication | public static function checkRobotGlutenFree() |
|
707 | |||
708 | /** |
||
709 | * Determine if the agent is the Proximic spider. |
||
710 | * |
||
711 | * @return bool |
||
712 | */ |
||
713 | View Code Duplication | public static function checkRobotProximic() |
|
723 | |||
724 | /** |
||
725 | * Determine if the agent is the Ubermetrics survey. |
||
726 | * |
||
727 | * @return bool |
||
728 | */ |
||
729 | View Code Duplication | public static function checkRobotUbermetrics() |
|
739 | |||
740 | /** |
||
741 | * Determine if the agent is the Verisign ips-agent. |
||
742 | * |
||
743 | * @return bool |
||
744 | */ |
||
745 | View Code Duplication | public static function checkRobotVerisign() |
|
755 | } |
||
756 |
Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.
You can also find more detailed suggestions in the “Code” section of your repository.