Total Complexity | 69 |
Total Lines | 658 |
Duplicated Lines | 0 % |
Changes | 0 |
Complex classes like AutoEditsRepository often do a lot of different things. To break such a class down, we need to identify a cohesive component within that class. A common approach to find such a component is to look for fields/methods that share the same prefixes, or suffixes.
Once you have determined the fields that belong together, you can apply the Extract Class refactoring. If the component makes sense as a sub-class, Extract Subclass is also a candidate, and is often faster.
While breaking up the class, it is a good idea to analyze how other classes use AutoEditsRepository, and based on these observations, apply Extract Interface, too.
1 | <?php |
||
23 | class AutoEditsRepository extends UserRepository |
||
24 | { |
||
25 | protected AutomatedEditsHelper $autoEditsHelper; |
||
26 | |||
27 | /** @var array List of automated tools, used for fetching the tool list and filtering it. */ |
||
28 | private array $aeTools; |
||
29 | |||
30 | /** @var bool Whether to use the /sandbox version of the config, bypassing caching. */ |
||
31 | private bool $useSandbox = false; |
||
32 | |||
33 | /** @var array Process cache for tags/IDs. */ |
||
34 | private array $tags; |
||
35 | |||
36 | public function __construct( |
||
37 | ManagerRegistry $managerRegistry, |
||
38 | CacheItemPoolInterface $cache, |
||
39 | Client $guzzle, |
||
40 | LoggerInterface $logger, |
||
41 | ParameterBagInterface $parameterBag, |
||
42 | bool $isWMF, |
||
43 | int $queryTimeout, |
||
44 | ProjectRepository $projectRepo, |
||
45 | AutomatedEditsHelper $autoEditsHelper |
||
46 | ) { |
||
47 | $this->autoEditsHelper = $autoEditsHelper; |
||
48 | parent::__construct( |
||
49 | $managerRegistry, |
||
50 | $cache, |
||
51 | $guzzle, |
||
52 | $logger, |
||
53 | $parameterBag, |
||
54 | $isWMF, |
||
55 | $queryTimeout, |
||
56 | $projectRepo |
||
57 | ); |
||
58 | } |
||
59 | |||
60 | /** |
||
61 | * @param bool $useSandbox |
||
62 | * @return AutoEditsRepository |
||
63 | */ |
||
64 | public function setUseSandbox(bool $useSandbox): AutoEditsRepository |
||
68 | } |
||
69 | |||
70 | /** |
||
71 | * Method to give the repository access to the AutomatedEditsHelper and fetch the list of semi-automated tools. |
||
72 | * @param Project $project |
||
73 | * @param int|string $namespace Namespace ID or 'all'. |
||
74 | * @return array |
||
75 | */ |
||
76 | public function getTools(Project $project, $namespace = 'all'): array |
||
77 | { |
||
78 | if (!isset($this->aeTools)) { |
||
79 | $this->aeTools = $this->autoEditsHelper->getTools($project, $this->useSandbox); |
||
80 | } |
||
81 | |||
82 | if ('all' !== $namespace) { |
||
83 | // Limit by namespace. |
||
84 | return array_filter($this->aeTools, function (array $tool) use ($namespace) { |
||
85 | return empty($tool['namespaces']) || |
||
86 | in_array((int)$namespace, $tool['namespaces']) || |
||
87 | ( |
||
88 | 1 === $namespace % 2 && |
||
89 | isset($tool['talk_namespaces']) |
||
90 | ); |
||
91 | }); |
||
92 | } |
||
93 | |||
94 | return $this->aeTools; |
||
95 | } |
||
96 | |||
97 | /** |
||
98 | * Get tools that were misconfigured, also removing them from $this->aeTools. |
||
99 | * @param Project $project |
||
100 | * @return string[] Labels for the invalid tools. |
||
101 | */ |
||
102 | public function getInvalidTools(Project $project): array |
||
103 | { |
||
104 | $tools = $this->getTools($project); |
||
105 | $invalidTools = $tools['invalid'] ?? []; |
||
106 | unset($this->aeTools['invalid']); |
||
107 | return $invalidTools; |
||
108 | } |
||
109 | |||
110 | /** |
||
111 | * Overrides Repository::setCache(), and will not call the parent (which sets the cache) if using the sandbox. |
||
112 | * @inheritDoc |
||
113 | */ |
||
114 | public function setCache(string $cacheKey, $value, $duration = 'PT20M') |
||
115 | { |
||
116 | if ($this->useSandbox) { |
||
117 | return $value; |
||
118 | } |
||
119 | |||
120 | return parent::setCache($cacheKey, $value, $duration); |
||
121 | } |
||
122 | |||
123 | /** |
||
124 | * Get the number of edits this user made using semi-automated tools. |
||
125 | * @param Project $project |
||
126 | * @param User $user |
||
127 | * @param string|int $namespace Namespace ID or 'all' |
||
128 | * @param int|false $start Start date as Unix timestamp. |
||
129 | * @param int|false $end End date as Unix timestamp. |
||
130 | * @return int Result of query, see below. |
||
131 | */ |
||
132 | public function countAutomatedEdits( |
||
133 | Project $project, |
||
134 | User $user, |
||
135 | $namespace = 'all', |
||
136 | $start = false, |
||
137 | $end = false |
||
138 | ): int { |
||
139 | $cacheKey = $this->getCacheKey(func_get_args(), 'user_autoeditcount'); |
||
140 | if (!$this->useSandbox && $this->cache->hasItem($cacheKey)) { |
||
141 | return $this->cache->getItem($cacheKey)->get(); |
||
142 | } |
||
143 | |||
144 | $revDateConditions = $this->getDateConditions($start, $end); |
||
145 | |||
146 | // Get the combined regex and tags for the tools |
||
147 | [$regex, $tagIds] = $this->getToolRegexAndTags($project, false, null, $namespace); |
||
148 | |||
149 | [$pageJoin, $condNamespace] = $this->getPageAndNamespaceSql($project, $namespace); |
||
150 | |||
151 | $revisionTable = $project->getTableName('revision'); |
||
152 | $ipcTable = $project->getTableName('ip_changes'); |
||
153 | $commentTable = $project->getTableName('comment', 'revision'); |
||
154 | $tagTable = $project->getTableName('change_tag'); |
||
155 | $commentJoin = ''; |
||
156 | $tagJoin = ''; |
||
157 | |||
158 | $params = []; |
||
159 | |||
160 | // IP range handling. |
||
161 | $ipcJoin = ''; |
||
162 | $whereClause = 'rev_actor = :actorId'; |
||
163 | if ($user->isIpRange()) { |
||
164 | $ipcJoin = "JOIN $ipcTable ON rev_id = ipc_rev_id"; |
||
165 | $whereClause = 'ipc_hex BETWEEN :startIp AND :endIp'; |
||
166 | [$params['startIp'], $params['endIp']] = IPUtils::parseRange($user->getUsername()); |
||
167 | } |
||
168 | |||
169 | // Build SQL for detecting AutoEdits via regex and/or tags. |
||
170 | $condTools = []; |
||
171 | if ('' != $regex) { |
||
172 | $commentJoin = "LEFT OUTER JOIN $commentTable ON rev_comment_id = comment_id"; |
||
173 | $condTools[] = "comment_text REGEXP :tools"; |
||
174 | $params['tools'] = $regex; |
||
175 | } |
||
176 | if ('' != $tagIds) { |
||
177 | $tagJoin = "LEFT OUTER JOIN $tagTable ON ct_rev_id = rev_id"; |
||
178 | $condTools[] = "ct_tag_id IN ($tagIds)"; |
||
179 | } |
||
180 | $condTool = 'AND (' . implode(' OR ', $condTools) . ')'; |
||
181 | |||
182 | $sql = "SELECT COUNT(DISTINCT(rev_id)) |
||
183 | FROM $revisionTable |
||
184 | $ipcJoin |
||
185 | $pageJoin |
||
186 | $commentJoin |
||
187 | $tagJoin |
||
188 | WHERE $whereClause |
||
189 | $condNamespace |
||
190 | $condTool |
||
191 | $revDateConditions"; |
||
192 | |||
193 | $resultQuery = $this->executeQuery($sql, $project, $user, $namespace, $params); |
||
194 | $result = (int)$resultQuery->fetchOne(); |
||
195 | |||
196 | // Cache and return. |
||
197 | return $this->setCache($cacheKey, $result); |
||
198 | } |
||
199 | |||
200 | /** |
||
201 | * Get non-automated contributions for the given user. |
||
202 | * @param Project $project |
||
203 | * @param User $user |
||
204 | * @param string|int $namespace Namespace ID or 'all'. |
||
205 | * @param int|false $start Start date as Unix timestamp. |
||
206 | * @param int|false $end End date as Unix timestamp. |
||
207 | * @param int|false $offset Unix timestamp. Used for pagination. |
||
208 | * @param int $limit Number of results to return. |
||
209 | * @return string[] Result of query, with columns 'page_title', 'page_namespace', 'rev_id', 'timestamp', 'minor', |
||
210 | * 'length', 'length_change', 'comment'. |
||
211 | */ |
||
212 | public function getNonAutomatedEdits( |
||
280 | } |
||
281 | |||
282 | /** |
||
283 | * Get (semi-)automated contributions for the given user, and optionally for a given tool. |
||
284 | * @param Project $project |
||
285 | * @param User $user |
||
286 | * @param string|int $namespace Namespace ID or 'all'. |
||
287 | * @param int|false $start Start date as Unix timestamp. |
||
288 | * @param int|false $end End date as Unix timestamp. |
||
289 | * @param string|null $tool Only get edits made with this tool. Must match the keys in the AutoEdits config. |
||
290 | * @param int|false $offset Unix timestamp. Used for pagination. |
||
291 | * @param int $limit Number of results to return. |
||
292 | * @return string[] Result of query, with columns 'page_title', 'page_namespace', 'rev_id', 'timestamp', 'minor', |
||
293 | * 'length', 'length_change', 'comment'. |
||
294 | */ |
||
295 | public function getAutomatedEdits( |
||
296 | Project $project, |
||
297 | User $user, |
||
298 | $namespace = 'all', |
||
299 | $start = false, |
||
300 | $end = false, |
||
301 | ?string $tool = null, |
||
302 | $offset = false, |
||
303 | int $limit = 50 |
||
304 | ): array { |
||
305 | $cacheKey = $this->getCacheKey(func_get_args(), 'user_autoedits'); |
||
306 | if (!$this->useSandbox && $this->cache->hasItem($cacheKey)) { |
||
307 | return $this->cache->getItem($cacheKey)->get(); |
||
308 | } |
||
309 | |||
310 | $revDateConditions = $this->getDateConditions($start, $end, $offset, 'revs.'); |
||
311 | |||
312 | // In this case there is a slight performance improvement we can make if we're not given a start date. |
||
313 | if ('' === $revDateConditions) { |
||
314 | $revDateConditions = 'AND revs.rev_timestamp > 0'; |
||
315 | } |
||
316 | |||
317 | // Get the combined regex and tags for the tools |
||
318 | [$regex, $tagIds] = $this->getToolRegexAndTags($project, false, $tool); |
||
319 | |||
320 | $pageTable = $project->getTableName('page'); |
||
321 | $revisionTable = $project->getTableName('revision'); |
||
322 | $ipcTable = $project->getTableName('ip_changes'); |
||
323 | $commentTable = $project->getTableName('comment', 'revision'); |
||
324 | $tagTable = $project->getTableName('change_tag'); |
||
325 | $condNamespace = 'all' === $namespace ? '' : 'AND page_namespace = :namespace'; |
||
326 | $tagJoin = ''; |
||
327 | $condsTool = []; |
||
328 | |||
329 | if ('' != $regex) { |
||
330 | $condsTool[] = 'comment_text RLIKE :tools'; |
||
331 | } |
||
332 | |||
333 | if ('' != $tagIds) { |
||
334 | $tagJoin = "LEFT OUTER JOIN $tagTable ON (ct_rev_id = revs.rev_id)"; |
||
335 | $condsTool[] = "ct_tag_id IN ($tagIds)"; |
||
336 | } |
||
337 | |||
338 | // IP range handling. |
||
339 | $ipcJoin = ''; |
||
340 | $whereClause = 'revs.rev_actor = :actorId'; |
||
341 | $params = ['tools' => $regex]; |
||
342 | if ($user->isIpRange()) { |
||
343 | $ipcJoin = "JOIN $ipcTable ON rev_id = ipc_rev_id"; |
||
344 | $whereClause = 'ipc_hex BETWEEN :startIp AND :endIp'; |
||
345 | [$params['startIp'], $params['endIp']] = IPUtils::parseRange($user->getUsername()); |
||
346 | } |
||
347 | |||
348 | $sql = "SELECT |
||
349 | page_title, |
||
350 | page_namespace, |
||
351 | revs.rev_id AS rev_id, |
||
352 | revs.rev_timestamp AS timestamp, |
||
353 | revs.rev_minor_edit AS minor, |
||
354 | revs.rev_len AS length, |
||
355 | (CAST(revs.rev_len AS SIGNED) - IFNULL(parentrevs.rev_len, 0)) AS length_change, |
||
356 | comment_text AS comment |
||
357 | FROM $pageTable |
||
358 | JOIN $revisionTable AS revs ON (page_id = revs.rev_page) |
||
359 | $ipcJoin |
||
360 | LEFT JOIN $revisionTable AS parentrevs ON (revs.rev_parent_id = parentrevs.rev_id) |
||
361 | LEFT OUTER JOIN $commentTable ON (revs.rev_comment_id = comment_id) |
||
362 | $tagJoin |
||
363 | WHERE $whereClause |
||
364 | $revDateConditions |
||
365 | $condNamespace |
||
366 | AND (".implode(' OR ', $condsTool).") |
||
367 | GROUP BY revs.rev_id |
||
368 | ORDER BY revs.rev_timestamp DESC |
||
369 | LIMIT $limit"; |
||
370 | |||
371 | $resultQuery = $this->executeQuery($sql, $project, $user, $namespace, $params); |
||
372 | $result = $resultQuery->fetchAllAssociative(); |
||
373 | |||
374 | // Cache and return. |
||
375 | return $this->setCache($cacheKey, $result); |
||
376 | } |
||
377 | |||
378 | /** |
||
379 | * Get counts of known automated tools used by the given user. |
||
380 | * @param Project $project |
||
381 | * @param User $user |
||
382 | * @param string|int $namespace Namespace ID or 'all'. |
||
383 | * @param int|false $start Start date as Unix timestamp. |
||
384 | * @param int|false $end End date as Unix timestamp. |
||
385 | * @return string[] Each tool that they used along with the count and link: |
||
386 | * [ |
||
387 | * 'Twinkle' => [ |
||
388 | * 'count' => 50, |
||
389 | * 'link' => 'Wikipedia:Twinkle', |
||
390 | * ], |
||
391 | * ] |
||
392 | */ |
||
393 | public function getToolCounts(Project $project, User $user, $namespace = 'all', $start = false, $end = false): array |
||
394 | { |
||
395 | $cacheKey = $this->getCacheKey(func_get_args(), 'user_autotoolcounts'); |
||
396 | if (!$this->useSandbox && $this->cache->hasItem($cacheKey)) { |
||
397 | return $this->cache->getItem($cacheKey)->get(); |
||
398 | } |
||
399 | |||
400 | $sql = $this->getAutomatedCountsSql($project, $user, $namespace, $start, $end); |
||
401 | $params = []; |
||
402 | if ($user->isIpRange()) { |
||
403 | [$params['startIp'], $params['endIp']] = IPUtils::parseRange($user->getUsername()); |
||
404 | } |
||
405 | $resultQuery = $this->executeQuery($sql, $project, $user, $namespace, $params); |
||
406 | |||
407 | $tools = $this->getTools($project, $namespace); |
||
408 | |||
409 | // handling results |
||
410 | $results = []; |
||
411 | |||
412 | while ($row = $resultQuery->fetchAssociative()) { |
||
413 | // Only track tools that they've used at least once |
||
414 | $tool = $row['toolname']; |
||
415 | if ($row['count'] > 0) { |
||
416 | $results[$tool] = [ |
||
417 | 'link' => $tools[$tool]['link'], |
||
418 | 'label' => $tools[$tool]['label'] ?? $tool, |
||
419 | 'count' => $row['count'], |
||
420 | ]; |
||
421 | } |
||
422 | } |
||
423 | |||
424 | // Sort the array by count |
||
425 | uasort($results, function ($a, $b) { |
||
426 | return $b['count'] - $a['count']; |
||
427 | }); |
||
428 | |||
429 | // Cache and return. |
||
430 | return $this->setCache($cacheKey, $results); |
||
431 | } |
||
432 | |||
433 | /** |
||
434 | * Get SQL for getting counts of known automated tools used by the user. |
||
435 | * @see self::getAutomatedCounts() |
||
436 | * @param Project $project |
||
437 | * @param User $user |
||
438 | * @param string|int $namespace Namespace ID or 'all'. |
||
439 | * @param int|false $start Start date as Unix timestamp. |
||
440 | * @param int|false $end End date as Unix timestamp. |
||
441 | * @return string The SQL. |
||
442 | */ |
||
443 | private function getAutomatedCountsSql( |
||
444 | Project $project, |
||
445 | User $user, |
||
446 | $namespace, |
||
447 | $start = false, |
||
448 | $end = false |
||
449 | ): string { |
||
450 | $revDateConditions = $this->getDateConditions($start, $end); |
||
451 | |||
452 | // Load the semi-automated edit types. |
||
453 | $tools = $this->getTools($project, $namespace); |
||
454 | |||
455 | // Create a collection of queries that we're going to run. |
||
456 | $queries = []; |
||
457 | |||
458 | $revisionTable = $project->getTableName('revision'); |
||
459 | $ipcTable = $project->getTableName('ip_changes'); |
||
460 | [$pageJoin, $condNamespace] = $this->getPageAndNamespaceSql($project, $namespace); |
||
461 | $conn = $this->getProjectsConnection($project); |
||
462 | |||
463 | // IP range handling. |
||
464 | $ipcJoin = ''; |
||
465 | $whereClause = 'rev_actor = :actorId'; |
||
466 | if ($user->isIpRange()) { |
||
467 | $ipcJoin = "JOIN $ipcTable ON rev_id = ipc_rev_id"; |
||
468 | $whereClause = 'ipc_hex BETWEEN :startIp AND :endIp'; |
||
469 | } |
||
470 | |||
471 | foreach ($tools as $toolName => $values) { |
||
472 | [$condTool, $commentJoin, $tagJoin] = $this->getInnerAutomatedCountsSql($project, $toolName, $values); |
||
473 | |||
474 | $toolName = $conn->quote($toolName, PDO::PARAM_STR); |
||
475 | |||
476 | // No regex or tag provided for this tool. This can happen for tag-only tools that are in the global |
||
477 | // configuration, but no local tag exists on the said project. |
||
478 | if ('' === $condTool) { |
||
479 | continue; |
||
480 | } |
||
481 | |||
482 | $queries[] .= " |
||
483 | SELECT $toolName AS toolname, COUNT(DISTINCT(rev_id)) AS count |
||
484 | FROM $revisionTable |
||
485 | $ipcJoin |
||
486 | $pageJoin |
||
487 | $commentJoin |
||
488 | $tagJoin |
||
489 | WHERE $whereClause |
||
490 | AND $condTool |
||
491 | $condNamespace |
||
492 | $revDateConditions"; |
||
493 | } |
||
494 | |||
495 | // Combine to one big query. |
||
496 | return implode(' UNION ', $queries); |
||
497 | } |
||
498 | |||
499 | /** |
||
500 | * Get some of the inner SQL for self::getAutomatedCountsSql(). |
||
501 | * @param Project $project |
||
502 | * @param string $toolName |
||
503 | * @param string[] $values Values as defined in the AutoEdits config. |
||
504 | * @return string[] [Equality clause, JOIN clause] |
||
505 | */ |
||
506 | private function getInnerAutomatedCountsSql(Project $project, string $toolName, array $values): array |
||
507 | { |
||
508 | $conn = $this->getProjectsConnection($project); |
||
509 | $commentJoin = ''; |
||
510 | $tagJoin = ''; |
||
511 | $condTool = ''; |
||
512 | |||
513 | if (isset($values['regex'])) { |
||
514 | $commentTable = $project->getTableName('comment', 'revision'); |
||
515 | $commentJoin = "LEFT OUTER JOIN $commentTable ON rev_comment_id = comment_id"; |
||
516 | $regex = $conn->quote($values['regex'], PDO::PARAM_STR); |
||
517 | $condTool = "comment_text REGEXP $regex"; |
||
518 | } |
||
519 | if (isset($values['tags'])) { |
||
520 | $tagIds = $this->getTagIdsFromNames($project, $values['tags']); |
||
|
|||
521 | |||
522 | if ($tagIds) { |
||
523 | $tagTable = $project->getTableName('change_tag'); |
||
524 | $tagJoin = "LEFT OUTER JOIN $tagTable ON ct_rev_id = rev_id"; |
||
525 | $tagClause = $this->getTagsExclusionsSql($project, $toolName, $tagIds); |
||
526 | |||
527 | // Use tags in addition to the regex clause, if already present. |
||
528 | // Tags are more reliable but may not be present for edits made with |
||
529 | // older versions of the tool, before it started adding tags. |
||
530 | if ('' === $condTool) { |
||
531 | $condTool = $tagClause; |
||
532 | } else { |
||
533 | $condTool = "($condTool OR $tagClause)"; |
||
534 | } |
||
535 | } |
||
536 | } |
||
537 | |||
538 | return [$condTool, $commentJoin, $tagJoin]; |
||
539 | } |
||
540 | |||
541 | /** |
||
542 | * Get the combined regex and tags for all semi-automated tools, or the given tool, ready to be used in a query. |
||
543 | * @param Project $project |
||
544 | * @param bool $nonAutoEdits Set to true to exclude tools with the 'contribs' flag. |
||
545 | * @param string|null $tool |
||
546 | * @param int|string|null $namespace Tools only used in given namespace ID, or 'all' for all namespaces. |
||
547 | * @return array In the format: ['combined|regex', '1,2,3'] where the second element is a |
||
548 | * comma-separated list of the tag IDs, ready to be used in SQL. |
||
549 | */ |
||
550 | private function getToolRegexAndTags( |
||
587 | ]; |
||
588 | } |
||
589 | |||
590 | /** |
||
591 | * Get the IDs of tags for given Project, which are used in the IN clauses of other queries above. |
||
592 | * This join decomposition is actually faster than JOIN'ing on change_tag_def all in one query. |
||
593 | * @param Project $project |
||
594 | * @return int[] Keys are the tag name, values are the IDs. |
||
595 | */ |
||
596 | public function getTags(Project $project): array |
||
632 | } |
||
633 | |||
634 | /** |
||
635 | * Generate the WHERE clause to query for the given tags, filtering out exclusions ('tag_excludes' option). |
||
636 | * For instance, Huggle edits are also tagged as Rollback, but when viewing |
||
637 | * Rollback edits we don't want to show Huggle edits. |
||
638 | * @param Project $project |
||
639 | * @param string $tool |
||
640 | * @param array $tagIds |
||
641 | * @return string |
||
642 | */ |
||
643 | private function getTagsExclusionsSql(Project $project, string $tool, array $tagIds): string |
||
644 | { |
||
645 | $tagsList = implode(',', $tagIds); |
||
646 | $tagExcludes = $this->getTools($project)[$tool]['tag_excludes'] ?? []; |
||
647 | $excludesSql = ''; |
||
648 | |||
649 | if ($tagExcludes && 1 === count($tagIds)) { |
||
650 | // Get tag IDs, filtering out those for which no ID exists (meaning there is no local tag for that tool). |
||
651 | $excludesList = implode(',', array_filter(array_map(function ($tagName) use ($project) { |
||
652 | return $this->getTags($project)[$tagName] ?? null; |
||
653 | }, $tagExcludes))); |
||
654 | |||
655 | if (strlen($excludesList)) { |
||
656 | $excludesSql = "AND ct_tag_id NOT IN ($excludesList)"; |
||
657 | } |
||
658 | } |
||
659 | |||
660 | return "ct_tag_id IN ($tagsList) $excludesSql"; |
||
661 | } |
||
662 | |||
663 | /** |
||
664 | * Get IDs for tags given the names. |
||
665 | * @param Project $project |
||
666 | * @param array $tagNames |
||
667 | * @return array |
||
668 | */ |
||
669 | private function getTagIdsFromNames(Project $project, array $tagNames): array |
||
681 | } |
||
682 | } |
||
683 |