Duplicate code is one of the most pungent code smells. A rule that is often used is to re-structure code once it is duplicated in three or more places.
Common duplication problems, and corresponding solutions are:
Complex classes like UserRepository often do a lot of different things. To break such a class down, we need to identify a cohesive component within that class. A common approach to find such a component is to look for fields/methods that share the same prefixes, or suffixes. You can also have a look at the cohesion graph to spot any un-connected, or weakly-connected components.
Once you have determined the fields that belong together, you can apply the Extract Class refactoring. If the component makes sense as a sub-class, Extract Subclass is also a candidate, and is often faster.
While breaking up the class, it is a good idea to analyze how other classes use UserRepository, and based on these observations, apply Extract Interface, too.
| 1 | <?php |
||
| 17 | class UserRepository extends Repository |
||
| 18 | { |
||
| 19 | |||
| 20 | /** |
||
| 21 | * Convenience method to get a new User object. |
||
| 22 | * @param string $username The username. |
||
| 23 | * @param Container $container The DI container. |
||
| 24 | * @return User |
||
| 25 | */ |
||
| 26 | public static function getUser($username, Container $container) |
||
| 34 | |||
| 35 | /** |
||
| 36 | * Get the user's ID. |
||
| 37 | * @param string $databaseName The database to query. |
||
| 38 | * @param string $username The username to find. |
||
| 39 | * @return int |
||
| 40 | */ |
||
| 41 | View Code Duplication | public function getId($databaseName, $username) |
|
| 64 | |||
| 65 | /** |
||
| 66 | * Get the user's registration date. |
||
| 67 | * @param string $databaseName The database to query. |
||
| 68 | * @param string $username The username to find. |
||
| 69 | * @return string|null As returned by the database. |
||
| 70 | */ |
||
| 71 | View Code Duplication | public function getRegistrationDate($databaseName, $username) |
|
| 94 | |||
| 95 | /** |
||
| 96 | * Get group names of the given user. |
||
| 97 | * @param Project $project The project. |
||
| 98 | * @param string $username The username. |
||
| 99 | * @return string[] |
||
| 100 | */ |
||
| 101 | public function getGroups(Project $project, $username) |
||
| 128 | |||
| 129 | /** |
||
| 130 | * Get a user's global group membership (starting at XTools' default project if none is |
||
| 131 | * provided). This requires the CentralAuth extension to be installed. |
||
| 132 | * @link https://www.mediawiki.org/wiki/Extension:CentralAuth |
||
| 133 | * @param string $username The username. |
||
| 134 | * @param Project $project The project to query. |
||
| 135 | * @return string[] |
||
| 136 | */ |
||
| 137 | public function getGlobalGroups($username, Project $project = null) |
||
| 157 | |||
| 158 | /** |
||
| 159 | * Search the ipblocks table to see if the user is currently blocked |
||
| 160 | * and return the expiry if they are |
||
| 161 | * @param $databaseName The database to query. |
||
| 162 | * @param $userid The ID of the user to search for. |
||
| 163 | * @return bool|string Expiry of active block or false |
||
| 164 | */ |
||
| 165 | public function getBlockExpiry($databaseName, $userid) |
||
| 177 | |||
| 178 | /** |
||
| 179 | * Get pages created by a user |
||
| 180 | * @param Project $project |
||
| 181 | * @param User $user |
||
| 182 | * @param string|int $namespace Namespace ID or 'all' |
||
| 183 | * @param string $redirects One of 'noredirects', 'onlyredirects' or blank for both |
||
| 184 | * @return string[] Result of query, see below. Includes live and deleted pages. |
||
| 185 | */ |
||
| 186 | public function getPagesCreated(Project $project, User $user, $namespace, $redirects) |
||
| 285 | |||
| 286 | /** |
||
| 287 | * Get edit count within given timeframe and namespace |
||
| 288 | * @param Project $project |
||
| 289 | * @param User $user |
||
| 290 | * @param int|string [$namespace] Namespace ID or 'all' for all namespaces |
||
| 291 | * @param string [$start] Start date in a format accepted by strtotime() |
||
| 292 | * @param string [$end] End date in a format accepted by strtotime() |
||
| 293 | */ |
||
| 294 | public function countEdits(Project $project, User $user, $namespace = 'all', $start = '', $end = '') |
||
| 358 | |||
| 359 | /** |
||
| 360 | * Get the number of edits this user made using semi-automated tools. |
||
| 361 | * @param Project $project |
||
| 362 | * @param User $user |
||
| 363 | * @param string|int [$namespace] Namespace ID or 'all' |
||
| 364 | * @param string [$start] Start date in a format accepted by strtotime() |
||
| 365 | * @param string [$end] End date in a format accepted by strtotime() |
||
| 366 | * @return int Result of query, see below. |
||
| 367 | */ |
||
| 368 | public function countAutomatedEdits(Project $project, User $user, $namespace = 'all', $start = '', $end = '') |
||
| 369 | { |
||
| 370 | $cacheKey = 'autoeditcount.' . $project->getDatabaseName() . '.' |
||
| 371 | . $user->getCacheKey() . '.' . $namespace; |
||
| 372 | |||
| 373 | $condBegin = ''; |
||
| 374 | $condEnd = ''; |
||
| 375 | |||
| 376 | View Code Duplication | if (!empty($start)) { |
|
| 377 | $cacheKey .= '.' . $start; |
||
| 378 | |||
| 379 | // For the query |
||
| 380 | $start = date('Ymd000000', strtotime($start)); |
||
| 381 | $condBegin = 'AND rev_timestamp >= :start '; |
||
| 382 | } |
||
| 383 | View Code Duplication | if (!empty($end)) { |
|
| 384 | $cacheKey .= '.' . $end; |
||
| 385 | |||
| 386 | // For the query |
||
| 387 | $end = date('Ymd235959', strtotime($end)); |
||
| 388 | $condEnd = 'AND rev_timestamp <= :end '; |
||
| 389 | } |
||
| 390 | |||
| 391 | if ($this->cache->hasItem($cacheKey)) { |
||
| 392 | return $this->cache->getItem($cacheKey)->get(); |
||
| 393 | } |
||
| 394 | $this->stopwatch->start($cacheKey, 'XTools'); |
||
| 395 | |||
| 396 | // Get the combined regex and tags for the tools |
||
| 397 | $conn = $this->getProjectsConnection(); |
||
| 398 | list($regex, $tags) = $this->getToolRegexAndTags($project->getDomain(), $conn); |
||
| 399 | |||
| 400 | $pageTable = $this->getTableName($project->getDatabaseName(), 'page'); |
||
| 401 | $revisionTable = $this->getTableName($project->getDatabaseName(), 'revision'); |
||
| 402 | $tagTable = $this->getTableName($project->getDatabaseName(), 'change_tag'); |
||
| 403 | $condNamespace = $namespace === 'all' ? '' : 'AND page_namespace = :namespace'; |
||
| 404 | $pageJoin = $namespace === 'all' ? '' : "JOIN $pageTable ON page_id = rev_page"; |
||
| 405 | $tagJoin = ''; |
||
| 406 | |||
| 407 | // Build SQL for detecting autoedits via regex and/or tags |
||
| 408 | $condTools = []; |
||
| 409 | if ($regex != '') { |
||
| 410 | $condTools[] = "rev_comment REGEXP $regex"; |
||
| 411 | } |
||
| 412 | if ($tags != '') { |
||
| 413 | $tagJoin = $tags != '' ? "LEFT OUTER JOIN $tagTable ON ct_rev_id = rev_id" : ''; |
||
| 414 | $condTools[] = "ct_tag IN ($tags)"; |
||
| 415 | } |
||
| 416 | $condTool = 'AND (' . implode(' OR ', $condTools) . ')'; |
||
| 417 | |||
| 418 | $sql = "SELECT COUNT(DISTINCT(rev_id)) |
||
| 419 | FROM $revisionTable |
||
| 420 | $pageJoin |
||
| 421 | $tagJoin |
||
| 422 | WHERE rev_user_text = :username |
||
| 423 | $condTool |
||
| 424 | $condNamespace |
||
| 425 | $condBegin |
||
| 426 | $condEnd"; |
||
| 427 | |||
| 428 | $username = $user->getUsername(); |
||
| 429 | $resultQuery = $conn->prepare($sql); |
||
| 430 | $resultQuery->bindParam('username', $username); |
||
| 431 | if (!empty($start)) { |
||
| 432 | $resultQuery->bindParam('start', $start); |
||
| 433 | } |
||
| 434 | if (!empty($end)) { |
||
| 435 | $resultQuery->bindParam('end', $end); |
||
| 436 | } |
||
| 437 | if ($namespace !== 'all') { |
||
| 438 | $resultQuery->bindParam('namespace', $namespace); |
||
| 439 | } |
||
| 440 | $resultQuery->execute(); |
||
| 441 | $result = (int) $resultQuery->fetchColumn(); |
||
| 442 | |||
| 443 | // Cache for 10 minutes, and return. |
||
|
1 ignored issue
–
show
|
|||
| 444 | $cacheItem = $this->cache->getItem($cacheKey) |
||
| 445 | ->set($result) |
||
| 446 | ->expiresAfter(new DateInterval('PT10M')); |
||
| 447 | $this->cache->save($cacheItem); |
||
| 448 | $this->stopwatch->stop($cacheKey); |
||
| 449 | |||
| 450 | return $result; |
||
| 451 | } |
||
| 452 | |||
| 453 | /** |
||
| 454 | * Get non-automated contributions for the given user. |
||
| 455 | * @param Project $project |
||
| 456 | * @param User $user |
||
| 457 | * @param string|int [$namespace] Namespace ID or 'all' |
||
| 458 | * @param string [$start] Start date in a format accepted by strtotime() |
||
| 459 | * @param string [$end] End date in a format accepted by strtotime() |
||
| 460 | * @param int [$offset] Used for pagination, offset results by N edits |
||
| 461 | * @return string[] Result of query, with columns 'page_title', |
||
| 462 | * 'page_namespace', 'rev_id', 'timestamp', 'minor', |
||
| 463 | * 'length', 'length_change', 'comment' |
||
| 464 | */ |
||
| 465 | public function getNonAutomatedEdits( |
||
| 466 | Project $project, |
||
| 467 | User $user, |
||
| 468 | $namespace = 'all', |
||
| 469 | $start = '', |
||
| 470 | $end = '', |
||
| 471 | $offset = 0 |
||
| 472 | ) { |
||
| 473 | $cacheKey = 'nonautoedits.' . $project->getDatabaseName() . '.' |
||
| 474 | . $user->getCacheKey() . '.' . $namespace . '.' . $offset; |
||
| 475 | |||
| 476 | $condBegin = ''; |
||
| 477 | $condEnd = ''; |
||
| 478 | |||
| 479 | View Code Duplication | if (!empty($start)) { |
|
| 480 | $cacheKey .= '.' . $start; |
||
| 481 | |||
| 482 | // For the query |
||
| 483 | $start = date('Ymd000000', strtotime($start)); |
||
| 484 | $condBegin = 'AND revs.rev_timestamp >= :start '; |
||
| 485 | } |
||
| 486 | View Code Duplication | if (!empty($end)) { |
|
| 487 | $cacheKey .= '.' . $end; |
||
| 488 | |||
| 489 | // For the query |
||
| 490 | $end = date('Ymd235959', strtotime($end)); |
||
| 491 | $condEnd = 'AND revs.rev_timestamp <= :end '; |
||
| 492 | } |
||
| 493 | |||
| 494 | if ($this->cache->hasItem($cacheKey)) { |
||
| 495 | return $this->cache->getItem($cacheKey)->get(); |
||
| 496 | } |
||
| 497 | $this->stopwatch->start($cacheKey, 'XTools'); |
||
| 498 | |||
| 499 | // Get the combined regex and tags for the tools |
||
| 500 | $conn = $this->getProjectsConnection(); |
||
| 501 | list($regex, $tags) = $this->getToolRegexAndTags($project->getDomain(), $conn); |
||
| 502 | |||
| 503 | $pageTable = $this->getTableName($project->getDatabaseName(), 'page'); |
||
| 504 | $revisionTable = $this->getTableName($project->getDatabaseName(), 'revision'); |
||
| 505 | $tagTable = $this->getTableName($project->getDatabaseName(), 'change_tag'); |
||
| 506 | $condNamespace = $namespace === 'all' ? '' : 'AND page_namespace = :namespace'; |
||
| 507 | $tagJoin = $tags != '' ? "LEFT OUTER JOIN $tagTable ON (ct_rev_id = revs.rev_id)" : ''; |
||
| 508 | $condTag = $tags != '' ? "AND (ct_tag NOT IN ($tags) OR ct_tag IS NULL)" : ''; |
||
| 509 | $sql = "SELECT |
||
| 510 | page_title, |
||
| 511 | page_namespace, |
||
| 512 | revs.rev_id AS rev_id, |
||
| 513 | revs.rev_timestamp AS timestamp, |
||
| 514 | revs.rev_minor_edit AS minor, |
||
| 515 | revs.rev_len AS length, |
||
| 516 | (CAST(revs.rev_len AS SIGNED) - IFNULL(parentrevs.rev_len, 0)) AS length_change, |
||
| 517 | revs.rev_comment AS comment |
||
| 518 | FROM $pageTable |
||
| 519 | JOIN $revisionTable AS revs ON (page_id = revs.rev_page) |
||
| 520 | LEFT JOIN $revisionTable AS parentrevs ON (revs.rev_parent_id = parentrevs.rev_id) |
||
| 521 | $tagJoin |
||
| 522 | WHERE revs.rev_user_text = :username |
||
| 523 | AND revs.rev_timestamp > 0 |
||
| 524 | AND revs.rev_comment NOT RLIKE $regex |
||
| 525 | $condTag |
||
| 526 | $condBegin |
||
| 527 | $condEnd |
||
| 528 | $condNamespace |
||
| 529 | ORDER BY revs.rev_timestamp DESC |
||
| 530 | LIMIT 50 |
||
| 531 | OFFSET $offset"; |
||
| 532 | |||
| 533 | $username = $user->getUsername(); |
||
| 534 | $resultQuery = $conn->prepare($sql); |
||
| 535 | $resultQuery->bindParam('username', $username); |
||
| 536 | if (!empty($start)) { |
||
| 537 | $resultQuery->bindParam('start', $start); |
||
| 538 | } |
||
| 539 | if (!empty($end)) { |
||
| 540 | $resultQuery->bindParam('end', $end); |
||
| 541 | } |
||
| 542 | if ($namespace !== 'all') { |
||
| 543 | $resultQuery->bindParam('namespace', $namespace); |
||
| 544 | } |
||
| 545 | $resultQuery->execute(); |
||
| 546 | $result = $resultQuery->fetchAll(); |
||
| 547 | |||
| 548 | // Cache for 10 minutes, and return. |
||
|
1 ignored issue
–
show
|
|||
| 549 | $cacheItem = $this->cache->getItem($cacheKey) |
||
| 550 | ->set($result) |
||
| 551 | ->expiresAfter(new DateInterval('PT10M')); |
||
| 552 | $this->cache->save($cacheItem); |
||
| 553 | $this->stopwatch->stop($cacheKey); |
||
| 554 | |||
| 555 | return $result; |
||
| 556 | } |
||
| 557 | |||
| 558 | /** |
||
| 559 | * Get non-automated contributions for the given user. |
||
| 560 | * @param Project $project |
||
| 561 | * @param User $user |
||
| 562 | * @param string|int [$namespace] Namespace ID or 'all' |
||
| 563 | * @param string [$start] Start date in a format accepted by strtotime() |
||
| 564 | * @param string [$end] End date in a format accepted by strtotime() |
||
| 565 | * @return string[] Each tool that they used along with the count and link: |
||
| 566 | * [ |
||
| 567 | * 'Twinkle' => [ |
||
| 568 | * 'count' => 50, |
||
| 569 | * 'link' => 'Wikipedia:Twinkle', |
||
| 570 | * ], |
||
| 571 | * ] |
||
| 572 | */ |
||
| 573 | public function getAutomatedCounts( |
||
| 574 | Project $project, |
||
| 575 | User $user, |
||
| 576 | $namespace = 'all', |
||
| 577 | $start = '', |
||
| 578 | $end = '' |
||
| 579 | ) { |
||
| 580 | $cacheKey = 'autotoolcounts.' . $project->getDatabaseName() . '.' |
||
| 581 | . $user->getCacheKey() . '.' . $namespace; |
||
| 582 | |||
| 583 | $condBegin = ''; |
||
| 584 | $condEnd = ''; |
||
| 585 | |||
| 586 | View Code Duplication | if (!empty($start)) { |
|
| 587 | $cacheKey .= '.' . $start; |
||
| 588 | |||
| 589 | // For the query |
||
| 590 | $start = date('Ymd000000', strtotime($start)); |
||
| 591 | $condBegin = 'AND rev_timestamp >= :start '; |
||
| 592 | } |
||
| 593 | View Code Duplication | if (!empty($end)) { |
|
| 594 | $cacheKey .= '.' . $end; |
||
| 595 | |||
| 596 | // For the query |
||
| 597 | $end = date('Ymd235959', strtotime($end)); |
||
| 598 | $condEnd = 'AND rev_timestamp <= :end '; |
||
| 599 | } |
||
| 600 | |||
| 601 | if ($this->cache->hasItem($cacheKey)) { |
||
| 602 | return $this->cache->getItem($cacheKey)->get(); |
||
| 603 | } |
||
| 604 | $this->stopwatch->start($cacheKey, 'XTools'); |
||
| 605 | |||
| 606 | $conn = $this->getProjectsConnection(); |
||
| 607 | |||
| 608 | // Load the semi-automated edit types. |
||
| 609 | $automatedEditsHelper = $this->container->get('app.automated_edits_helper'); |
||
| 610 | $tools = $automatedEditsHelper->getTools($project->getDomain()); |
||
| 611 | |||
| 612 | // Create a collection of queries that we're going to run. |
||
| 613 | $queries = []; |
||
| 614 | |||
| 615 | $revisionTable = $project->getRepository()->getTableName($project->getDatabaseName(), 'revision'); |
||
| 616 | $pageTable = $project->getRepository()->getTableName($project->getDatabaseName(), 'page'); |
||
| 617 | $tagTable = $project->getRepository()->getTableName($project->getDatabaseName(), 'change_tag'); |
||
| 618 | |||
| 619 | $pageJoin = $namespace !== 'all' ? "LEFT JOIN $pageTable ON rev_page = page_id" : null; |
||
| 620 | $condNamespace = $namespace !== 'all' ? "AND page_namespace = :namespace" : null; |
||
| 621 | |||
| 622 | foreach ($tools as $toolname => $values) { |
||
| 623 | $tagJoin = ''; |
||
| 624 | $condTool = ''; |
||
| 625 | $toolname = $conn->quote($toolname, \PDO::PARAM_STR); |
||
| 626 | |||
| 627 | if (isset($values['regex'])) { |
||
| 628 | $regex = $conn->quote($values['regex'], \PDO::PARAM_STR); |
||
| 629 | $condTool = "rev_comment REGEXP $regex"; |
||
| 630 | } |
||
| 631 | if (isset($values['tag'])) { |
||
| 632 | $tagJoin = "LEFT OUTER JOIN $tagTable ON ct_rev_id = rev_id"; |
||
| 633 | $tag = $conn->quote($values['tag'], \PDO::PARAM_STR); |
||
| 634 | |||
| 635 | // Append to regex clause if already present. |
||
| 636 | // Tags are more reliable but may not be present for edits made with |
||
| 637 | // older versions of the tool, before it started adding tags. |
||
| 638 | if ($condTool === '') { |
||
| 639 | $condTool = "ct_tag = $tag"; |
||
| 640 | } else { |
||
| 641 | $condTool = '(' . $condTool . " OR ct_tag = $tag)"; |
||
| 642 | } |
||
| 643 | } |
||
| 644 | |||
| 645 | // Developer error, no regex or tag provided for this tool. |
||
| 646 | if ($condTool === '') { |
||
| 647 | throw new Exception("No regex or tag found for the tool $toolname. " . |
||
| 648 | "Please verify this entry in semi_automated.yml"); |
||
| 649 | } |
||
| 650 | |||
| 651 | $queries[] .= " |
||
| 652 | SELECT $toolname AS toolname, COUNT(rev_id) AS count |
||
| 653 | FROM $revisionTable |
||
| 654 | $pageJoin |
||
| 655 | $tagJoin |
||
| 656 | WHERE rev_user_text = :username |
||
| 657 | AND $condTool |
||
| 658 | $condNamespace |
||
| 659 | $condBegin |
||
| 660 | $condEnd"; |
||
| 661 | } |
||
| 662 | |||
| 663 | // Create a big query and execute. |
||
| 664 | $sql = implode(' UNION ', $queries); |
||
| 665 | |||
| 666 | $resultQuery = $conn->prepare($sql); |
||
| 667 | |||
| 668 | $username = $user->getUsername(); // use normalized user name |
||
| 669 | $resultQuery->bindParam('username', $username); |
||
| 670 | if (!empty($start)) { |
||
| 671 | $startParam = date('Ymd000000', strtotime($start)); |
||
| 672 | $resultQuery->bindParam('start', $startParam); |
||
| 673 | } |
||
| 674 | if (!empty($end)) { |
||
| 675 | $endParam = date('Ymd235959', strtotime($end)); |
||
| 676 | $resultQuery->bindParam('end', $endParam); |
||
| 677 | } |
||
| 678 | if ($namespace !== 'all') { |
||
| 679 | $resultQuery->bindParam('namespace', $namespace); |
||
| 680 | } |
||
| 681 | |||
| 682 | $resultQuery->execute(); |
||
| 683 | |||
| 684 | // handling results |
||
| 685 | $results = []; |
||
| 686 | |||
| 687 | while ($row = $resultQuery->fetch()) { |
||
| 688 | // Only track tools that they've used at least once |
||
| 689 | $tool = $row['toolname']; |
||
| 690 | if ($row['count'] > 0) { |
||
| 691 | $results[$tool] = [ |
||
| 692 | 'link' => $tools[$tool]['link'], |
||
| 693 | 'count' => $row['count'], |
||
| 694 | ]; |
||
| 695 | } |
||
| 696 | } |
||
| 697 | |||
| 698 | // Sort the array by count |
||
| 699 | uasort($results, function ($a, $b) { |
||
| 700 | return $b['count'] - $a['count']; |
||
| 701 | }); |
||
| 702 | |||
| 703 | // Cache for 10 minutes, and return. |
||
|
1 ignored issue
–
show
|
|||
| 704 | $cacheItem = $this->cache->getItem($cacheKey) |
||
| 705 | ->set($results) |
||
| 706 | ->expiresAfter(new DateInterval('PT10M')); |
||
| 707 | $this->cache->save($cacheItem); |
||
| 708 | $this->stopwatch->stop($cacheKey); |
||
| 709 | |||
| 710 | return $results; |
||
| 711 | } |
||
| 712 | |||
| 713 | /** |
||
| 714 | * Get information about the currently-logged in user. |
||
| 715 | * @return array |
||
| 716 | */ |
||
| 717 | public function getXtoolsUserInfo() |
||
| 723 | |||
| 724 | /** |
||
| 725 | * Get the combined regex and tags for all semi-automated tools, |
||
| 726 | * ready to be used in a query. |
||
| 727 | * @param string $projectDomain Such as en.wikipedia.org |
||
| 728 | * @param $conn Doctrine\DBAL\Connection Used for proper escaping |
||
| 729 | * @return string[] In the format: |
||
| 730 | * ['combined|regex', 'combined,tags'] |
||
| 731 | */ |
||
| 732 | private function getToolRegexAndTags($projectDomain, $conn) |
||
| 752 | } |
||
| 753 |
Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.
You can also find more detailed suggestions in the “Code” section of your repository.