Total Complexity | 68 |
Total Lines | 634 |
Duplicated Lines | 0 % |
Changes | 0 |
Complex classes like AutoEditsRepository often do a lot of different things. To break such a class down, we need to identify a cohesive component within that class. A common approach to find such a component is to look for fields/methods that share the same prefixes, or suffixes.
Once you have determined the fields that belong together, you can apply the Extract Class refactoring. If the component makes sense as a sub-class, Extract Subclass is also a candidate, and is often faster.
While breaking up the class, it is a good idea to analyze how other classes use AutoEditsRepository, and based on these observations, apply Extract Interface, too.
1 | <?php |
||
20 | class AutoEditsRepository extends UserRepository |
||
21 | { |
||
22 | /** @var array List of automated tools, used for fetching the tool list and filtering it. */ |
||
23 | private $aeTools; |
||
24 | |||
25 | /** @var bool Whether to use the /sandbox version of the config, bypassing caching. */ |
||
26 | private $useSandbox; |
||
27 | |||
28 | /** @var array Process cache for tags/IDs. */ |
||
29 | private $tags; |
||
30 | |||
31 | /** |
||
32 | * AutoEditsRepository constructor. Used solely to set $useSandbox (from AutomatedEditsController). |
||
33 | * @param bool $useSandbox |
||
34 | */ |
||
35 | public function __construct(bool $useSandbox = false) |
||
36 | { |
||
37 | parent::__construct(); |
||
38 | $this->useSandbox = $useSandbox; |
||
39 | } |
||
40 | |||
41 | /** |
||
42 | * Method to give the repository access to the AutomatedEditsHelper and fetch the list of semi-automated tools. |
||
43 | * @param Project $project |
||
44 | * @param int|string $namespace Namespace ID or 'all'. |
||
45 | * @return array |
||
46 | */ |
||
47 | public function getTools(Project $project, $namespace = 'all'): array |
||
48 | { |
||
49 | if (!isset($this->aeTools)) { |
||
50 | $this->aeTools = $this->container |
||
51 | ->get('app.automated_edits_helper') |
||
52 | ->getTools($project, $this->useSandbox); |
||
53 | } |
||
54 | |||
55 | if ('all' !== $namespace) { |
||
56 | // Limit by namespace. |
||
57 | return array_filter($this->aeTools, function (array $tool) use ($namespace) { |
||
58 | return empty($tool['namespaces']) || |
||
59 | in_array((int)$namespace, $tool['namespaces']) || |
||
60 | ( |
||
61 | 1 === $namespace % 2 && |
||
62 | isset($tool['talk_namespaces']) |
||
63 | ); |
||
64 | }); |
||
65 | } |
||
66 | |||
67 | return $this->aeTools; |
||
68 | } |
||
69 | |||
70 | /** |
||
71 | * Get tools that were misconfigured, also removing them from $this->aeTools. |
||
72 | * @param Project $project |
||
73 | * @return string[] Labels for the invalid tools. |
||
74 | */ |
||
75 | public function getInvalidTools(Project $project): array |
||
76 | { |
||
77 | $tools = $this->getTools($project); |
||
78 | $invalidTools = $tools['invalid'] ?? []; |
||
79 | unset($this->aeTools['invalid']); |
||
80 | return $invalidTools; |
||
81 | } |
||
82 | |||
83 | /** |
||
84 | * Overrides Repository::setCache(), and will not call the parent (which sets the cache) if using the sandbox. |
||
85 | * @inheritDoc |
||
86 | */ |
||
87 | public function setCache(string $cacheKey, $value, $duration = 'PT20M') |
||
88 | { |
||
89 | if ($this->useSandbox) { |
||
90 | return $value; |
||
91 | } |
||
92 | |||
93 | return parent::setCache($cacheKey, $value, $duration); |
||
94 | } |
||
95 | |||
96 | /** |
||
97 | * Get the number of edits this user made using semi-automated tools. |
||
98 | * @param Project $project |
||
99 | * @param User $user |
||
100 | * @param string|int $namespace Namespace ID or 'all' |
||
101 | * @param int|false $start Start date as Unix timestamp. |
||
102 | * @param int|false $end End date as Unix timestamp. |
||
103 | * @return int Result of query, see below. |
||
104 | */ |
||
105 | public function countAutomatedEdits( |
||
106 | Project $project, |
||
107 | User $user, |
||
108 | $namespace = 'all', |
||
109 | $start = false, |
||
110 | $end = false |
||
111 | ): int { |
||
112 | $cacheKey = $this->getCacheKey(func_get_args(), 'user_autoeditcount'); |
||
113 | if (!$this->useSandbox && $this->cache->hasItem($cacheKey)) { |
||
114 | return $this->cache->getItem($cacheKey)->get(); |
||
115 | } |
||
116 | |||
117 | $revDateConditions = $this->getDateConditions($start, $end); |
||
118 | |||
119 | // Get the combined regex and tags for the tools |
||
120 | [$regex, $tagIds] = $this->getToolRegexAndTags($project, false, null, $namespace); |
||
121 | |||
122 | [$pageJoin, $condNamespace] = $this->getPageAndNamespaceSql($project, $namespace); |
||
123 | |||
124 | $revisionTable = $project->getTableName('revision'); |
||
125 | $ipcTable = $project->getTableName('ip_changes'); |
||
126 | $commentTable = $project->getTableName('comment', 'revision'); |
||
127 | $tagTable = $project->getTableName('change_tag'); |
||
128 | $commentJoin = ''; |
||
129 | $tagJoin = ''; |
||
130 | |||
131 | $params = []; |
||
132 | |||
133 | // IP range handling. |
||
134 | $ipcJoin = ''; |
||
135 | $whereClause = 'rev_actor = :actorId'; |
||
136 | if ($user->isIpRange()) { |
||
137 | $ipcJoin = "JOIN $ipcTable ON rev_id = ipc_rev_id"; |
||
138 | $whereClause = 'ipc_hex BETWEEN :startIp AND :endIp'; |
||
139 | [$params['startIp'], $params['endIp']] = IPUtils::parseRange($user->getUsername()); |
||
140 | } |
||
141 | |||
142 | // Build SQL for detecting AutoEdits via regex and/or tags. |
||
143 | $condTools = []; |
||
144 | if ('' != $regex) { |
||
145 | $commentJoin = "LEFT OUTER JOIN $commentTable ON rev_comment_id = comment_id"; |
||
146 | $condTools[] = "comment_text REGEXP :tools"; |
||
147 | $params['tools'] = $regex; |
||
148 | } |
||
149 | if ('' != $tagIds) { |
||
150 | $tagJoin = "LEFT OUTER JOIN $tagTable ON ct_rev_id = rev_id"; |
||
151 | $condTools[] = "ct_tag_id IN ($tagIds)"; |
||
152 | } |
||
153 | $condTool = 'AND (' . implode(' OR ', $condTools) . ')'; |
||
154 | |||
155 | $sql = "SELECT COUNT(DISTINCT(rev_id)) |
||
156 | FROM $revisionTable |
||
157 | $ipcJoin |
||
158 | $pageJoin |
||
159 | $commentJoin |
||
160 | $tagJoin |
||
161 | WHERE $whereClause |
||
162 | $condNamespace |
||
163 | $condTool |
||
164 | $revDateConditions"; |
||
165 | |||
166 | $resultQuery = $this->executeQuery($sql, $project, $user, $namespace, $params); |
||
167 | $result = (int)$resultQuery->fetchOne(); |
||
168 | |||
169 | // Cache and return. |
||
170 | return $this->setCache($cacheKey, $result); |
||
171 | } |
||
172 | |||
173 | /** |
||
174 | * Get non-automated contributions for the given user. |
||
175 | * @param Project $project |
||
176 | * @param User $user |
||
177 | * @param string|int $namespace Namespace ID or 'all'. |
||
178 | * @param int|false $start Start date as Unix timestamp. |
||
179 | * @param int|false $end End date as Unix timestamp. |
||
180 | * @param int|false $offset Unix timestamp. Used for pagination. |
||
181 | * @param int $limit Number of results to return. |
||
182 | * @return string[] Result of query, with columns 'page_title', 'page_namespace', 'rev_id', 'timestamp', 'minor', |
||
183 | * 'length', 'length_change', 'comment'. |
||
184 | */ |
||
185 | public function getNonAutomatedEdits( |
||
253 | } |
||
254 | |||
255 | /** |
||
256 | * Get (semi-)automated contributions for the given user, and optionally for a given tool. |
||
257 | * @param Project $project |
||
258 | * @param User $user |
||
259 | * @param string|int $namespace Namespace ID or 'all'. |
||
260 | * @param int|false $start Start date as Unix timestamp. |
||
261 | * @param int|false $end End date as Unix timestamp. |
||
262 | * @param string|null $tool Only get edits made with this tool. Must match the keys in the AutoEdits config. |
||
263 | * @param int|false $offset Unix timestamp. Used for pagination. |
||
264 | * @param int $limit Number of results to return. |
||
265 | * @return string[] Result of query, with columns 'page_title', 'page_namespace', 'rev_id', 'timestamp', 'minor', |
||
266 | * 'length', 'length_change', 'comment'. |
||
267 | */ |
||
268 | public function getAutomatedEdits( |
||
269 | Project $project, |
||
270 | User $user, |
||
271 | $namespace = 'all', |
||
272 | $start = false, |
||
273 | $end = false, |
||
274 | ?string $tool = null, |
||
275 | $offset = false, |
||
276 | int $limit = 50 |
||
277 | ): array { |
||
278 | $cacheKey = $this->getCacheKey(func_get_args(), 'user_autoedits'); |
||
279 | if (!$this->useSandbox && $this->cache->hasItem($cacheKey)) { |
||
280 | return $this->cache->getItem($cacheKey)->get(); |
||
281 | } |
||
282 | |||
283 | $revDateConditions = $this->getDateConditions($start, $end, $offset, 'revs.'); |
||
284 | |||
285 | // In this case there is a slight performance improvement we can make if we're not given a start date. |
||
286 | if ('' === $revDateConditions) { |
||
287 | $revDateConditions = 'AND revs.rev_timestamp > 0'; |
||
288 | } |
||
289 | |||
290 | // Get the combined regex and tags for the tools |
||
291 | [$regex, $tagIds] = $this->getToolRegexAndTags($project, false, $tool); |
||
292 | |||
293 | $pageTable = $project->getTableName('page'); |
||
294 | $revisionTable = $project->getTableName('revision'); |
||
295 | $ipcTable = $project->getTableName('ip_changes'); |
||
296 | $commentTable = $project->getTableName('comment', 'revision'); |
||
297 | $tagTable = $project->getTableName('change_tag'); |
||
298 | $condNamespace = 'all' === $namespace ? '' : 'AND page_namespace = :namespace'; |
||
299 | $tagJoin = ''; |
||
300 | $condsTool = []; |
||
301 | |||
302 | if ('' != $regex) { |
||
303 | $condsTool[] = 'comment_text RLIKE :tools'; |
||
304 | } |
||
305 | |||
306 | if ('' != $tagIds) { |
||
307 | $tagJoin = "LEFT OUTER JOIN $tagTable ON (ct_rev_id = revs.rev_id)"; |
||
308 | $condsTool[] = "ct_tag_id IN ($tagIds)"; |
||
309 | } |
||
310 | |||
311 | // IP range handling. |
||
312 | $ipcJoin = ''; |
||
313 | $whereClause = 'revs.rev_actor = :actorId'; |
||
314 | $params = ['tools' => $regex]; |
||
315 | if ($user->isIpRange()) { |
||
316 | $ipcJoin = "JOIN $ipcTable ON rev_id = ipc_rev_id"; |
||
317 | $whereClause = 'ipc_hex BETWEEN :startIp AND :endIp'; |
||
318 | [$params['startIp'], $params['endIp']] = IPUtils::parseRange($user->getUsername()); |
||
319 | } |
||
320 | |||
321 | $sql = "SELECT |
||
322 | page_title, |
||
323 | page_namespace, |
||
324 | revs.rev_id AS rev_id, |
||
325 | revs.rev_timestamp AS timestamp, |
||
326 | revs.rev_minor_edit AS minor, |
||
327 | revs.rev_len AS length, |
||
328 | (CAST(revs.rev_len AS SIGNED) - IFNULL(parentrevs.rev_len, 0)) AS length_change, |
||
329 | comment_text AS comment |
||
330 | FROM $pageTable |
||
331 | JOIN $revisionTable AS revs ON (page_id = revs.rev_page) |
||
332 | $ipcJoin |
||
333 | LEFT JOIN $revisionTable AS parentrevs ON (revs.rev_parent_id = parentrevs.rev_id) |
||
334 | LEFT OUTER JOIN $commentTable ON (revs.rev_comment_id = comment_id) |
||
335 | $tagJoin |
||
336 | WHERE $whereClause |
||
337 | $revDateConditions |
||
338 | $condNamespace |
||
339 | AND (".implode(' OR ', $condsTool).") |
||
340 | GROUP BY revs.rev_id |
||
341 | ORDER BY revs.rev_timestamp DESC |
||
342 | LIMIT $limit"; |
||
343 | |||
344 | $resultQuery = $this->executeQuery($sql, $project, $user, $namespace, $params); |
||
345 | $result = $resultQuery->fetchAllAssociative(); |
||
346 | |||
347 | // Cache and return. |
||
348 | return $this->setCache($cacheKey, $result); |
||
349 | } |
||
350 | |||
351 | /** |
||
352 | * Get counts of known automated tools used by the given user. |
||
353 | * @param Project $project |
||
354 | * @param User $user |
||
355 | * @param string|int $namespace Namespace ID or 'all'. |
||
356 | * @param int|false $start Start date as Unix timestamp. |
||
357 | * @param int|false $end End date as Unix timestamp. |
||
358 | * @return string[] Each tool that they used along with the count and link: |
||
359 | * [ |
||
360 | * 'Twinkle' => [ |
||
361 | * 'count' => 50, |
||
362 | * 'link' => 'Wikipedia:Twinkle', |
||
363 | * ], |
||
364 | * ] |
||
365 | */ |
||
366 | public function getToolCounts(Project $project, User $user, $namespace = 'all', $start = false, $end = false): array |
||
367 | { |
||
368 | $cacheKey = $this->getCacheKey(func_get_args(), 'user_autotoolcounts'); |
||
369 | if (!$this->useSandbox && $this->cache->hasItem($cacheKey)) { |
||
370 | return $this->cache->getItem($cacheKey)->get(); |
||
371 | } |
||
372 | |||
373 | $sql = $this->getAutomatedCountsSql($project, $user, $namespace, $start, $end); |
||
374 | $params = []; |
||
375 | if ($user->isIpRange()) { |
||
376 | [$params['startIp'], $params['endIp']] = IPUtils::parseRange($user->getUsername()); |
||
377 | } |
||
378 | $resultQuery = $this->executeQuery($sql, $project, $user, $namespace, $params); |
||
379 | |||
380 | $tools = $this->getTools($project, $namespace); |
||
381 | |||
382 | // handling results |
||
383 | $results = []; |
||
384 | |||
385 | while ($row = $resultQuery->fetchAssociative()) { |
||
386 | // Only track tools that they've used at least once |
||
387 | $tool = $row['toolname']; |
||
388 | if ($row['count'] > 0) { |
||
389 | $results[$tool] = [ |
||
390 | 'link' => $tools[$tool]['link'], |
||
391 | 'label' => $tools[$tool]['label'] ?? $tool, |
||
392 | 'count' => $row['count'], |
||
393 | ]; |
||
394 | } |
||
395 | } |
||
396 | |||
397 | // Sort the array by count |
||
398 | uasort($results, function ($a, $b) { |
||
399 | return $b['count'] - $a['count']; |
||
400 | }); |
||
401 | |||
402 | // Cache and return. |
||
403 | return $this->setCache($cacheKey, $results); |
||
404 | } |
||
405 | |||
406 | /** |
||
407 | * Get SQL for getting counts of known automated tools used by the user. |
||
408 | * @see self::getAutomatedCounts() |
||
409 | * @param Project $project |
||
410 | * @param User $user |
||
411 | * @param string|int $namespace Namespace ID or 'all'. |
||
412 | * @param int|false $start Start date as Unix timestamp. |
||
413 | * @param int|false $end End date as Unix timestamp. |
||
414 | * @return string The SQL. |
||
415 | */ |
||
416 | private function getAutomatedCountsSql( |
||
417 | Project $project, |
||
418 | User $user, |
||
419 | $namespace, |
||
420 | $start = false, |
||
421 | $end = false |
||
422 | ): string { |
||
423 | $revDateConditions = $this->getDateConditions($start, $end); |
||
424 | |||
425 | // Load the semi-automated edit types. |
||
426 | $tools = $this->getTools($project, $namespace); |
||
427 | |||
428 | // Create a collection of queries that we're going to run. |
||
429 | $queries = []; |
||
430 | |||
431 | $revisionTable = $project->getTableName('revision'); |
||
432 | $ipcTable = $project->getTableName('ip_changes'); |
||
433 | [$pageJoin, $condNamespace] = $this->getPageAndNamespaceSql($project, $namespace); |
||
434 | $conn = $this->getProjectsConnection($project); |
||
435 | |||
436 | // IP range handling. |
||
437 | $ipcJoin = ''; |
||
438 | $whereClause = 'rev_actor = :actorId'; |
||
439 | if ($user->isIpRange()) { |
||
440 | $ipcJoin = "JOIN $ipcTable ON rev_id = ipc_rev_id"; |
||
441 | $whereClause = 'ipc_hex BETWEEN :startIp AND :endIp'; |
||
442 | } |
||
443 | |||
444 | foreach ($tools as $toolName => $values) { |
||
445 | [$condTool, $commentJoin, $tagJoin] = $this->getInnerAutomatedCountsSql($project, $toolName, $values); |
||
446 | |||
447 | $toolName = $conn->quote($toolName, PDO::PARAM_STR); |
||
448 | |||
449 | // No regex or tag provided for this tool. This can happen for tag-only tools that are in the global |
||
450 | // configuration, but no local tag exists on the said project. |
||
451 | if ('' === $condTool) { |
||
452 | continue; |
||
453 | } |
||
454 | |||
455 | $queries[] .= " |
||
456 | SELECT $toolName AS toolname, COUNT(DISTINCT(rev_id)) AS count |
||
457 | FROM $revisionTable |
||
458 | $ipcJoin |
||
459 | $pageJoin |
||
460 | $commentJoin |
||
461 | $tagJoin |
||
462 | WHERE $whereClause |
||
463 | AND $condTool |
||
464 | $condNamespace |
||
465 | $revDateConditions"; |
||
466 | } |
||
467 | |||
468 | // Combine to one big query. |
||
469 | return implode(' UNION ', $queries); |
||
470 | } |
||
471 | |||
472 | /** |
||
473 | * Get some of the inner SQL for self::getAutomatedCountsSql(). |
||
474 | * @param Project $project |
||
475 | * @param string $toolName |
||
476 | * @param string[] $values Values as defined in the AutoEdits config. |
||
477 | * @return string[] [Equality clause, JOIN clause] |
||
478 | */ |
||
479 | private function getInnerAutomatedCountsSql(Project $project, string $toolName, array $values): array |
||
480 | { |
||
481 | $conn = $this->getProjectsConnection($project); |
||
482 | $commentJoin = ''; |
||
483 | $tagJoin = ''; |
||
484 | $condTool = ''; |
||
485 | |||
486 | if (isset($values['regex'])) { |
||
487 | $commentTable = $project->getTableName('comment', 'revision'); |
||
488 | $commentJoin = "LEFT OUTER JOIN $commentTable ON rev_comment_id = comment_id"; |
||
489 | $regex = $conn->quote($values['regex'], PDO::PARAM_STR); |
||
490 | $condTool = "comment_text REGEXP $regex"; |
||
491 | } |
||
492 | if (isset($values['tags'])) { |
||
493 | $tagIds = $this->getTagIdsFromNames($project, $values['tags']); |
||
|
|||
494 | |||
495 | if ($tagIds) { |
||
496 | $tagTable = $project->getTableName('change_tag'); |
||
497 | $tagJoin = "LEFT OUTER JOIN $tagTable ON ct_rev_id = rev_id"; |
||
498 | $tagClause = $this->getTagsExclusionsSql($project, $toolName, $tagIds); |
||
499 | |||
500 | // Use tags in addition to the regex clause, if already present. |
||
501 | // Tags are more reliable but may not be present for edits made with |
||
502 | // older versions of the tool, before it started adding tags. |
||
503 | if ('' === $condTool) { |
||
504 | $condTool = $tagClause; |
||
505 | } else { |
||
506 | $condTool = "($condTool OR $tagClause)"; |
||
507 | } |
||
508 | } |
||
509 | } |
||
510 | |||
511 | return [$condTool, $commentJoin, $tagJoin]; |
||
512 | } |
||
513 | |||
514 | /** |
||
515 | * Get the combined regex and tags for all semi-automated tools, or the given tool, ready to be used in a query. |
||
516 | * @param Project $project |
||
517 | * @param bool $nonAutoEdits Set to true to exclude tools with the 'contribs' flag. |
||
518 | * @param string|null $tool |
||
519 | * @param int|string|null $namespace Tools only used in given namespace ID, or 'all' for all namespaces. |
||
520 | * @return array In the format: ['combined|regex', '1,2,3'] where the second element is a |
||
521 | * comma-separated list of the tag IDs, ready to be used in SQL. |
||
522 | */ |
||
523 | private function getToolRegexAndTags( |
||
560 | ]; |
||
561 | } |
||
562 | |||
563 | /** |
||
564 | * Get the IDs of tags for given Project, which are used in the IN clauses of other queries above. |
||
565 | * This join decomposition is actually faster than JOIN'ing on change_tag_def all in one query. |
||
566 | * @param Project $project |
||
567 | * @return int[] Keys are the tag name, values are the IDs. |
||
568 | */ |
||
569 | public function getTags(Project $project): array |
||
605 | } |
||
606 | |||
607 | /** |
||
608 | * Generate the WHERE clause to query for the given tags, filtering out exclusions ('tag_excludes' option). |
||
609 | * For instance, Huggle edits are also tagged as Rollback, but when viewing |
||
610 | * Rollback edits we don't want to show Huggle edits. |
||
611 | * @param Project $project |
||
612 | * @param string $tool |
||
613 | * @param array $tagIds |
||
614 | * @return string |
||
615 | */ |
||
616 | private function getTagsExclusionsSql(Project $project, string $tool, array $tagIds): string |
||
617 | { |
||
618 | $tagsList = implode(',', $tagIds); |
||
619 | $tagExcludes = $this->getTools($project)[$tool]['tag_excludes'] ?? []; |
||
620 | $excludesSql = ''; |
||
621 | |||
622 | if ($tagExcludes && 1 === count($tagIds)) { |
||
623 | // Get tag IDs, filtering out those for which no ID exists (meaning there is no local tag for that tool). |
||
624 | $excludesList = implode(',', array_filter(array_map(function ($tagName) use ($project) { |
||
625 | return $this->getTags($project)[$tagName] ?? null; |
||
626 | }, $tagExcludes))); |
||
627 | |||
628 | if (strlen($excludesList)) { |
||
629 | $excludesSql = "AND ct_tag_id NOT IN ($excludesList)"; |
||
630 | } |
||
631 | } |
||
632 | |||
633 | return "ct_tag_id IN ($tagsList) $excludesSql"; |
||
634 | } |
||
635 | |||
636 | /** |
||
637 | * Get IDs for tags given the names. |
||
638 | * @param Project $project |
||
639 | * @param array $tagNames |
||
640 | * @return array |
||
641 | */ |
||
642 | private function getTagIdsFromNames(Project $project, array $tagNames): array |
||
654 | } |
||
655 | } |
||
656 |