Duplicate code is one of the most pungent code smells. A rule that is often used is to re-structure code once it is duplicated in three or more places.
Common duplication problems, and corresponding solutions are:
Complex classes like ArticleInfo often do a lot of different things. To break such a class down, we need to identify a cohesive component within that class. A common approach to find such a component is to look for fields/methods that share the same prefixes, or suffixes. You can also have a look at the cohesion graph to spot any un-connected, or weakly-connected components.
Once you have determined the fields that belong together, you can apply the Extract Class refactoring. If the component makes sense as a sub-class, Extract Subclass is also a candidate, and is often faster.
While breaking up the class, it is a good idea to analyze how other classes use ArticleInfo, and based on these observations, apply Extract Interface, too.
| 1 | <?php |
||
| 16 | class ArticleInfo extends Model |
||
| 17 | { |
||
| 18 | /** @var Container The application's DI container. */ |
||
| 19 | protected $container; |
||
| 20 | |||
| 21 | /** @var Page The page. */ |
||
| 22 | protected $page; |
||
| 23 | |||
| 24 | /** @var int Number of revisions that belong to the page. */ |
||
| 25 | protected $numRevisions; |
||
| 26 | |||
| 27 | /** @var int Maximum number of revisions to process, as configured. */ |
||
| 28 | protected $maxRevisions; |
||
| 29 | |||
| 30 | /** @var int Number of revisions that were actually processed. */ |
||
| 31 | protected $numRevisionsProcessed; |
||
| 32 | |||
| 33 | /** |
||
| 34 | * Various statistics about editors to the page. These are not User objects |
||
| 35 | * so as to preserve memory. |
||
| 36 | * @var mixed[] |
||
| 37 | */ |
||
| 38 | protected $editors; |
||
| 39 | |||
| 40 | /** @var mixed[] The top 10 editors to the page by number of edits. */ |
||
| 41 | protected $topTenEditorsByEdits; |
||
| 42 | |||
| 43 | /** @var mixed[] The top 10 editors to the page by added text. */ |
||
| 44 | protected $topTenEditorsByAdded; |
||
| 45 | |||
| 46 | /** @var int Number of edits made by the top 10 editors. */ |
||
| 47 | protected $topTenCount; |
||
| 48 | |||
| 49 | /** @var mixed[] Various statistics about bots that edited the page. */ |
||
| 50 | protected $bots; |
||
| 51 | |||
| 52 | /** @var int Number of edits made to the page by bots. */ |
||
| 53 | protected $botRevisionCount; |
||
| 54 | |||
| 55 | /** @var mixed[] Various counts about each individual year and month of the page's history. */ |
||
| 56 | protected $yearMonthCounts; |
||
| 57 | |||
| 58 | /** @var Edit The first edit to the page. */ |
||
| 59 | protected $firstEdit; |
||
| 60 | |||
| 61 | /** @var Edit The last edit to the page. */ |
||
| 62 | protected $lastEdit; |
||
| 63 | |||
| 64 | /** @var Edit Edit that made the largest addition by number of bytes. */ |
||
| 65 | protected $maxAddition; |
||
| 66 | |||
| 67 | /** @var Edit Edit that made the largest deletion by number of bytes. */ |
||
| 68 | protected $maxDeletion; |
||
| 69 | |||
| 70 | /** @var int[] Number of in and outgoing links and redirects to the page. */ |
||
| 71 | protected $linksAndRedirects; |
||
| 72 | |||
| 73 | /** @var string[] Assessments of the page (see Page::getAssessments). */ |
||
| 74 | protected $assessments; |
||
| 75 | |||
| 76 | /** |
||
| 77 | * Maximum number of edits that were created across all months. This is used as a comparison |
||
| 78 | * for the bar charts in the months section. |
||
| 79 | * @var int |
||
| 80 | */ |
||
| 81 | protected $maxEditsPerMonth; |
||
| 82 | |||
| 83 | /** @var string[] List of (semi-)automated tools that were used to edit the page. */ |
||
| 84 | protected $tools; |
||
| 85 | |||
| 86 | /** |
||
| 87 | * Total number of bytes added throughout the page's history. This is used as a comparison |
||
| 88 | * when computing the top 10 editors by added text. |
||
| 89 | * @var int |
||
| 90 | */ |
||
| 91 | protected $addedBytes = 0; |
||
| 92 | |||
| 93 | /** @var int Number of days between first and last edit. */ |
||
| 94 | protected $totalDays; |
||
| 95 | |||
| 96 | /** @var int Number of minor edits to the page. */ |
||
| 97 | protected $minorCount = 0; |
||
| 98 | |||
| 99 | /** @var int Number of anonymous edits to the page. */ |
||
| 100 | protected $anonCount = 0; |
||
| 101 | |||
| 102 | /** @var int Number of automated edits to the page. */ |
||
| 103 | protected $automatedCount = 0; |
||
| 104 | |||
| 105 | /** @var int Number of edits to the page that were reverted with the subsequent edit. */ |
||
| 106 | protected $revertCount = 0; |
||
| 107 | |||
| 108 | /** @var int[] The "edits per <time>" counts. */ |
||
| 109 | protected $countHistory = [ |
||
| 110 | 'day' => 0, |
||
| 111 | 'week' => 0, |
||
| 112 | 'month' => 0, |
||
| 113 | 'year' => 0 |
||
| 114 | ]; |
||
| 115 | |||
| 116 | /** @var string[] List of wikidata and Checkwiki errors. */ |
||
| 117 | protected $bugs; |
||
| 118 | |||
| 119 | /** |
||
| 120 | * ArticleInfo constructor. |
||
| 121 | * @param Page $page The page to process. |
||
| 122 | * @param Container $container The DI container. |
||
| 123 | */ |
||
| 124 | 9 | public function __construct(Page $page, Container $container) |
|
| 129 | |||
| 130 | /** |
||
| 131 | * Shorthand to get the page's project. |
||
| 132 | * @return Project |
||
| 133 | * @codeCoverageIgnore |
||
| 134 | */ |
||
| 135 | public function getProject() |
||
| 139 | |||
| 140 | /** |
||
| 141 | * Get the number of revisions belonging to the page. |
||
| 142 | * @return int |
||
| 143 | */ |
||
| 144 | 4 | public function getNumRevisions() |
|
| 151 | |||
| 152 | /** |
||
| 153 | * Get the maximum number of revisions that we should process. |
||
| 154 | * @return int |
||
| 155 | */ |
||
| 156 | 3 | public function getMaxRevisions() |
|
| 163 | |||
| 164 | /** |
||
| 165 | * Get the number of revisions that are actually getting processed. |
||
| 166 | * This goes by the app.max_page_revisions parameter, or the actual |
||
| 167 | * number of revisions, whichever is smaller. |
||
| 168 | * @return int |
||
| 169 | */ |
||
| 170 | 5 | public function getNumRevisionsProcessed() |
|
| 184 | |||
| 185 | /** |
||
| 186 | * Are there more revisions than we should process, based on the config? |
||
| 187 | * @return bool |
||
| 188 | */ |
||
| 189 | 3 | public function tooManyRevisions() |
|
| 193 | |||
| 194 | /** |
||
| 195 | * Fetch and store all the data we need to show the ArticleInfo view. |
||
| 196 | * @codeCoverageIgnore |
||
| 197 | */ |
||
| 198 | public function prepareData() |
||
| 204 | |||
| 205 | /** |
||
| 206 | * Get the number of editors that edited the page. |
||
| 207 | * @return int |
||
| 208 | */ |
||
| 209 | 1 | public function getNumEditors() |
|
| 213 | |||
| 214 | /** |
||
| 215 | * Get the number of bots that edited the page. |
||
| 216 | * @return int |
||
| 217 | */ |
||
| 218 | public function getNumBots() |
||
| 222 | |||
| 223 | /** |
||
| 224 | * Get the number of days between the first and last edit. |
||
| 225 | * @return int |
||
| 226 | */ |
||
| 227 | 1 | public function getTotalDays() |
|
| 238 | |||
| 239 | /** |
||
| 240 | * Get the average number of days between edits to the page. |
||
| 241 | * @return double |
||
| 242 | */ |
||
| 243 | 1 | public function averageDaysPerEdit() |
|
| 247 | |||
| 248 | /** |
||
| 249 | * Get the average number of edits per day to the page. |
||
| 250 | * @return double |
||
| 251 | */ |
||
| 252 | 1 | public function editsPerDay() |
|
| 259 | |||
| 260 | /** |
||
| 261 | * Get the average number of edits per month to the page. |
||
| 262 | * @return double |
||
| 263 | */ |
||
| 264 | 1 | View Code Duplication | public function editsPerMonth() |
| 271 | |||
| 272 | /** |
||
| 273 | * Get the average number of edits per year to the page. |
||
| 274 | * @return double |
||
| 275 | */ |
||
| 276 | 1 | View Code Duplication | public function editsPerYear() |
| 283 | |||
| 284 | /** |
||
| 285 | * Get the average number of edits per editor. |
||
| 286 | * @return double |
||
| 287 | */ |
||
| 288 | 1 | public function editsPerEditor() |
|
| 292 | |||
| 293 | /** |
||
| 294 | * Get the percentage of minor edits to the page. |
||
| 295 | * @return double |
||
| 296 | */ |
||
| 297 | 1 | public function minorPercentage() |
|
| 304 | |||
| 305 | /** |
||
| 306 | * Get the percentage of anonymous edits to the page. |
||
| 307 | * @return double |
||
| 308 | */ |
||
| 309 | 1 | public function anonPercentage() |
|
| 316 | |||
| 317 | /** |
||
| 318 | * Get the percentage of edits made by the top 10 editors. |
||
| 319 | * @return double |
||
| 320 | */ |
||
| 321 | 1 | public function topTenPercentage() |
|
| 325 | |||
| 326 | /** |
||
| 327 | * Get the number of times the page has been viewed in the given timeframe. |
||
| 328 | * @param int $latest Last N days. |
||
| 329 | * @return int |
||
| 330 | */ |
||
| 331 | public function getPageviews($latest) |
||
| 335 | |||
| 336 | /** |
||
| 337 | * Get the page assessments of the page. |
||
| 338 | * @see https://www.mediawiki.org/wiki/Extension:PageAssessments |
||
| 339 | * @return string[]|false False if unsupported. |
||
| 340 | * @codeCoverageIgnore |
||
| 341 | */ |
||
| 342 | public function getAssessments() |
||
| 349 | |||
| 350 | /** |
||
| 351 | * Get the number of automated edits made to the page. |
||
| 352 | * @return int |
||
| 353 | */ |
||
| 354 | 1 | public function getAutomatedCount() |
|
| 358 | |||
| 359 | /** |
||
| 360 | * Get the number of edits to the page that were reverted with the subsequent edit. |
||
| 361 | * @return int |
||
| 362 | */ |
||
| 363 | 1 | public function getRevertCount() |
|
| 367 | |||
| 368 | /** |
||
| 369 | * Get the number of edits to the page made by logged out users. |
||
| 370 | * @return int |
||
| 371 | */ |
||
| 372 | 1 | public function getAnonCount() |
|
| 376 | |||
| 377 | /** |
||
| 378 | * Get the number of minor edits to the page. |
||
| 379 | * @return int |
||
| 380 | */ |
||
| 381 | 1 | public function getMinorCount() |
|
| 385 | |||
| 386 | /** |
||
| 387 | * Get the number of edits to the page made in the past day, week, month and year. |
||
| 388 | * @return int[] With keys 'day', 'week', 'month' and 'year'. |
||
| 389 | */ |
||
| 390 | public function getCountHistory() |
||
| 394 | |||
| 395 | /** |
||
| 396 | * Get the number of edits to the page made by the top 10 editors. |
||
| 397 | * @return int |
||
| 398 | */ |
||
| 399 | 1 | public function getTopTenCount() |
|
| 403 | |||
| 404 | /** |
||
| 405 | * Get the first edit to the page. |
||
| 406 | * @return Edit |
||
| 407 | */ |
||
| 408 | public function getFirstEdit() |
||
| 412 | |||
| 413 | /** |
||
| 414 | * Get the last edit to the page. |
||
| 415 | * @return Edit |
||
| 416 | */ |
||
| 417 | 1 | public function getLastEdit() |
|
| 421 | |||
| 422 | /** |
||
| 423 | * Get the edit that made the largest addition to the page (by number of bytes). |
||
| 424 | * @return Edit |
||
| 425 | */ |
||
| 426 | 1 | public function getMaxAddition() |
|
| 430 | |||
| 431 | /** |
||
| 432 | * Get the edit that made the largest removal to the page (by number of bytes). |
||
| 433 | * @return Edit |
||
| 434 | */ |
||
| 435 | 1 | public function getMaxDeletion() |
|
| 439 | |||
| 440 | /** |
||
| 441 | * Get the list of editors to the page, including various statistics. |
||
| 442 | * @return mixed[] |
||
| 443 | */ |
||
| 444 | 1 | public function getEditors() |
|
| 448 | |||
| 449 | /** |
||
| 450 | * Get the list of the top editors to the page (by edits), including various statistics. |
||
| 451 | * @return mixed[] |
||
| 452 | */ |
||
| 453 | 1 | public function topTenEditorsByEdits() |
|
| 457 | |||
| 458 | /** |
||
| 459 | * Get the list of the top editors to the page (by added text), including various statistics. |
||
| 460 | * @return mixed[] |
||
| 461 | */ |
||
| 462 | 1 | public function topTenEditorsByAdded() |
|
| 466 | |||
| 467 | /** |
||
| 468 | * Get various counts about each individual year and month of the page's history. |
||
| 469 | * @return mixed[] |
||
| 470 | */ |
||
| 471 | 2 | public function getYearMonthCounts() |
|
| 475 | |||
| 476 | /** |
||
| 477 | * Get the maximum number of edits that were created across all months. This is used as a |
||
| 478 | * comparison for the bar charts in the months section. |
||
| 479 | * @return int |
||
| 480 | */ |
||
| 481 | 1 | public function getMaxEditsPerMonth() |
|
| 485 | |||
| 486 | /** |
||
| 487 | * Get a list of (semi-)automated tools that were used to edit the page, including |
||
| 488 | * the number of times they were used, and a link to the tool's homepage. |
||
| 489 | * @return mixed[] |
||
| 490 | */ |
||
| 491 | 1 | public function getTools() |
|
| 495 | |||
| 496 | /** |
||
| 497 | * Get the list of page's wikidata and Checkwiki errors. |
||
| 498 | * @see Page::getErrors() |
||
| 499 | * @return string[] |
||
| 500 | */ |
||
| 501 | public function getBugs() |
||
| 508 | |||
| 509 | /** |
||
| 510 | * Get the number of wikidata nad CheckWiki errors. |
||
| 511 | * @return int |
||
| 512 | */ |
||
| 513 | public function numBugs() |
||
| 517 | |||
| 518 | /** |
||
| 519 | * Get the number of external links on the page. |
||
| 520 | * @return int |
||
| 521 | */ |
||
| 522 | 1 | public function linksExtCount() |
|
| 526 | |||
| 527 | /** |
||
| 528 | * Get the number of incoming links to the page. |
||
| 529 | * @return int |
||
| 530 | */ |
||
| 531 | 1 | public function linksInCount() |
|
| 535 | |||
| 536 | /** |
||
| 537 | * Get the number of outgoing links from the page. |
||
| 538 | * @return int |
||
| 539 | */ |
||
| 540 | 1 | public function linksOutCount() |
|
| 544 | |||
| 545 | /** |
||
| 546 | * Get the number of redirects to the page. |
||
| 547 | * @return int |
||
| 548 | */ |
||
| 549 | 1 | public function redirectsCount() |
|
| 553 | |||
| 554 | /** |
||
| 555 | * Get the number of external, incoming and outgoing links, along with |
||
| 556 | * the number of redirects to the page. |
||
| 557 | * @return int |
||
| 558 | * @codeCoverageIgnore |
||
| 559 | */ |
||
| 560 | private function getLinksAndRedirects() |
||
| 567 | |||
| 568 | /** |
||
| 569 | * Parse the revision history, collecting our core statistics. |
||
| 570 | * @return mixed[] Associative "master" array of metadata about the page. |
||
| 571 | * |
||
| 572 | * Untestable because it relies on getting a PDO statement. All the important |
||
| 573 | * logic lives in other methods which are tested. |
||
| 574 | * @codeCoverageIgnore |
||
| 575 | */ |
||
| 576 | private function parseHistory() |
||
| 632 | |||
| 633 | /** |
||
| 634 | * Update various counts based on the current edit. |
||
| 635 | * @param Edit $edit |
||
| 636 | * @param Edit[] $prevEdits With 'prev', 'maxAddition' and 'maxDeletion' |
||
| 637 | * @return Edit[] Updated version of $prevEdits. |
||
| 638 | */ |
||
| 639 | 3 | private function updateCounts(Edit $edit, $prevEdits) |
|
| 666 | |||
| 667 | /** |
||
| 668 | * Update various figures about content sizes based on the given edit. |
||
| 669 | * @param Edit $edit |
||
| 670 | * @param Edit[] $prevEdits With 'prev', 'maxAddition' and 'maxDeletion' |
||
| 671 | * @return Edit[] Updated version of $prevEdits. |
||
| 672 | */ |
||
| 673 | 3 | private function updateContentSizes(Edit $edit, $prevEdits) |
|
| 682 | |||
| 683 | /** |
||
| 684 | * Updates the figures on content sizes assuming the given edit was a revert of the previous one. |
||
| 685 | * In such a case, we don't want to treat the previous edit as legit content addition or removal. |
||
| 686 | * @param Edit[] $prevEdits With 'prev', 'maxAddition' and 'maxDeletion'. |
||
| 687 | * @return Edit[] Updated version of $prevEdits, for tracking. |
||
| 688 | */ |
||
| 689 | 3 | private function updateContentSizesRevert($prevEdits) |
|
| 710 | |||
| 711 | /** |
||
| 712 | * Updates the figures on content sizes assuming the given edit |
||
| 713 | * was NOT a revert of the previous edit. |
||
| 714 | * @param Edit $edit |
||
| 715 | * @param Edit[] $prevEdits With 'prev', 'maxAddition' and 'maxDeletion'. |
||
| 716 | * @return Edit[] Updated version of $prevEdits, for tracking. |
||
| 717 | */ |
||
| 718 | 3 | private function updateContentSizesNonRevert(Edit $edit, $prevEdits) |
|
| 745 | |||
| 746 | /** |
||
| 747 | * Get the size of the given edit, based on the previous edit (if present). |
||
| 748 | * We also don't return the actual edit size if last revision had a length of null. |
||
| 749 | * This happens when the edit follows other edits that were revision-deleted. |
||
| 750 | * @see T148857 for more information. |
||
| 751 | * @todo Remove once T101631 is resolved. |
||
| 752 | * @param Edit $edit |
||
| 753 | * @param Edit[] $prevEdits With 'prev', 'maxAddition' and 'maxDeletion'. |
||
| 754 | * @return Edit[] Updated version of $prevEdits, for tracking. |
||
| 755 | */ |
||
| 756 | 3 | private function getEditSize(Edit $edit, $prevEdits) |
|
| 764 | |||
| 765 | /** |
||
| 766 | * Update counts of automated tool usage for the given edit. |
||
| 767 | * @param Edit $edit |
||
| 768 | */ |
||
| 769 | 3 | private function updateToolCounts(Edit $edit) |
|
| 794 | |||
| 795 | /** |
||
| 796 | * Update various counts for the year and month of the given edit. |
||
| 797 | * @param Edit $edit |
||
| 798 | */ |
||
| 799 | 3 | private function updateYearMonthCounts(Edit $edit) |
|
| 821 | |||
| 822 | /** |
||
| 823 | * Add a new entry to $this->yearMonthCounts for the given year, |
||
| 824 | * with blank values for each month. This called during self::parseHistory(). |
||
| 825 | * @param Edit $edit |
||
| 826 | */ |
||
| 827 | 3 | private function addYearMonthCountEntry(Edit $edit) |
|
| 860 | |||
| 861 | /** |
||
| 862 | * Update the counts of anon and minor edits for year, month, |
||
| 863 | * and user of the given edit. |
||
| 864 | * @param Edit $edit |
||
| 865 | */ |
||
| 866 | 3 | private function updateAnonMinorCounts(Edit $edit) |
|
| 885 | |||
| 886 | /** |
||
| 887 | * Update various counts for the user of the given edit. |
||
| 888 | * @param Edit $edit |
||
| 889 | */ |
||
| 890 | 3 | private function updateUserCounts(Edit $edit) |
|
| 922 | |||
| 923 | /** |
||
| 924 | * Increment "edits per <time>" counts based on the given edit. |
||
| 925 | * @param Edit $edit |
||
| 926 | */ |
||
| 927 | 3 | private function updateCountHistory(Edit $edit) |
|
| 944 | |||
| 945 | /** |
||
| 946 | * Get info about bots that edited the page. |
||
| 947 | * @return mixed[] Contains the bot's username, edit count to the page, |
||
| 948 | * and whether or not they are currently a bot. |
||
| 949 | */ |
||
| 950 | public function getBots() |
||
| 974 | |||
| 975 | /** |
||
| 976 | * Number of edits made to the page by current or former bots. |
||
| 977 | * @param string[] $bots Used only in unit tests, where we |
||
| 978 | * supply mock data for the bots that will get processed. |
||
| 979 | * @return int |
||
| 980 | */ |
||
| 981 | 1 | public function getBotRevisionCount($bots = null) |
|
| 1000 | |||
| 1001 | /** |
||
| 1002 | * Query for log events during each year of the article's history, |
||
| 1003 | * and set the results in $this->yearMonthCounts. |
||
| 1004 | */ |
||
| 1005 | 1 | private function setLogsEvents() |
|
| 1045 | |||
| 1046 | /** |
||
| 1047 | * Set statistics about the top 10 editors by added text and number of edits. |
||
| 1048 | * This is ran *after* parseHistory() since we need the grand totals first. |
||
| 1049 | * Various stats are also set for each editor in $this->editors to be used in the charts. |
||
| 1050 | * @return integer Number of edits |
||
| 1051 | */ |
||
| 1052 | 3 | private function setTopTenCounts() |
|
| 1121 | } |
||
| 1122 |
Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.
You can also find more detailed suggestions in the “Code” section of your repository.