Total Complexity | 68 |
Total Lines | 634 |
Duplicated Lines | 0 % |
Changes | 0 |
Complex classes like AutoEditsRepository often do a lot of different things. To break such a class down, we need to identify a cohesive component within that class. A common approach to find such a component is to look for fields/methods that share the same prefixes, or suffixes.
Once you have determined the fields that belong together, you can apply the Extract Class refactoring. If the component makes sense as a sub-class, Extract Subclass is also a candidate, and is often faster.
While breaking up the class, it is a good idea to analyze how other classes use AutoEditsRepository, and based on these observations, apply Extract Interface, too.
1 | <?php |
||
17 | class AutoEditsRepository extends UserRepository |
||
18 | { |
||
19 | /** @var array List of automated tools, used for fetching the tool list and filtering it. */ |
||
20 | private array $aeTools; |
||
21 | |||
22 | /** @var bool Whether to use the /sandbox version of the config, bypassing caching. */ |
||
23 | private bool $useSandbox = false; |
||
24 | |||
25 | /** @var array Process cache for tags/IDs. */ |
||
26 | private array $tags; |
||
27 | |||
28 | /** |
||
29 | * @param bool $useSandbox |
||
30 | * @return AutoEditsRepository |
||
31 | */ |
||
32 | public function setUseSandbox(bool $useSandbox): AutoEditsRepository |
||
36 | } |
||
37 | |||
38 | /** |
||
39 | * Method to give the repository access to the AutomatedEditsHelper and fetch the list of semi-automated tools. |
||
40 | * @param Project $project |
||
41 | * @param int|string $namespace Namespace ID or 'all'. |
||
42 | * @return array |
||
43 | */ |
||
44 | public function getTools(Project $project, $namespace = 'all'): array |
||
45 | { |
||
46 | if (!isset($this->aeTools)) { |
||
47 | $this->aeTools = $this->container |
||
48 | ->get('app.automated_edits_helper') |
||
49 | ->getTools($project, $this->useSandbox); |
||
50 | } |
||
51 | |||
52 | if ('all' !== $namespace) { |
||
53 | // Limit by namespace. |
||
54 | return array_filter($this->aeTools, function (array $tool) use ($namespace) { |
||
55 | return empty($tool['namespaces']) || |
||
56 | in_array((int)$namespace, $tool['namespaces']) || |
||
57 | ( |
||
58 | 1 === $namespace % 2 && |
||
59 | isset($tool['talk_namespaces']) |
||
60 | ); |
||
61 | }); |
||
62 | } |
||
63 | |||
64 | return $this->aeTools; |
||
65 | } |
||
66 | |||
67 | /** |
||
68 | * Get tools that were misconfigured, also removing them from $this->aeTools. |
||
69 | * @param Project $project |
||
70 | * @return string[] Labels for the invalid tools. |
||
71 | */ |
||
72 | public function getInvalidTools(Project $project): array |
||
73 | { |
||
74 | $tools = $this->getTools($project); |
||
75 | $invalidTools = $tools['invalid'] ?? []; |
||
76 | unset($this->aeTools['invalid']); |
||
77 | return $invalidTools; |
||
78 | } |
||
79 | |||
80 | /** |
||
81 | * Overrides Repository::setCache(), and will not call the parent (which sets the cache) if using the sandbox. |
||
82 | * @inheritDoc |
||
83 | */ |
||
84 | public function setCache(string $cacheKey, $value, $duration = 'PT20M') |
||
85 | { |
||
86 | if ($this->useSandbox) { |
||
87 | return $value; |
||
88 | } |
||
89 | |||
90 | return parent::setCache($cacheKey, $value, $duration); |
||
91 | } |
||
92 | |||
93 | /** |
||
94 | * Get the number of edits this user made using semi-automated tools. |
||
95 | * @param Project $project |
||
96 | * @param User $user |
||
97 | * @param string|int $namespace Namespace ID or 'all' |
||
98 | * @param int|false $start Start date as Unix timestamp. |
||
99 | * @param int|false $end End date as Unix timestamp. |
||
100 | * @return int Result of query, see below. |
||
101 | */ |
||
102 | public function countAutomatedEdits( |
||
103 | Project $project, |
||
104 | User $user, |
||
105 | $namespace = 'all', |
||
106 | $start = false, |
||
107 | $end = false |
||
108 | ): int { |
||
109 | $cacheKey = $this->getCacheKey(func_get_args(), 'user_autoeditcount'); |
||
110 | if (!$this->useSandbox && $this->cache->hasItem($cacheKey)) { |
||
111 | return $this->cache->getItem($cacheKey)->get(); |
||
112 | } |
||
113 | |||
114 | $revDateConditions = $this->getDateConditions($start, $end); |
||
115 | |||
116 | // Get the combined regex and tags for the tools |
||
117 | [$regex, $tagIds] = $this->getToolRegexAndTags($project, false, null, $namespace); |
||
118 | |||
119 | [$pageJoin, $condNamespace] = $this->getPageAndNamespaceSql($project, $namespace); |
||
120 | |||
121 | $revisionTable = $project->getTableName('revision'); |
||
122 | $ipcTable = $project->getTableName('ip_changes'); |
||
123 | $commentTable = $project->getTableName('comment', 'revision'); |
||
124 | $tagTable = $project->getTableName('change_tag'); |
||
125 | $commentJoin = ''; |
||
126 | $tagJoin = ''; |
||
127 | |||
128 | $params = []; |
||
129 | |||
130 | // IP range handling. |
||
131 | $ipcJoin = ''; |
||
132 | $whereClause = 'rev_actor = :actorId'; |
||
133 | if ($user->isIpRange()) { |
||
134 | $ipcJoin = "JOIN $ipcTable ON rev_id = ipc_rev_id"; |
||
135 | $whereClause = 'ipc_hex BETWEEN :startIp AND :endIp'; |
||
136 | [$params['startIp'], $params['endIp']] = IPUtils::parseRange($user->getUsername()); |
||
137 | } |
||
138 | |||
139 | // Build SQL for detecting AutoEdits via regex and/or tags. |
||
140 | $condTools = []; |
||
141 | if ('' != $regex) { |
||
142 | $commentJoin = "LEFT OUTER JOIN $commentTable ON rev_comment_id = comment_id"; |
||
143 | $condTools[] = "comment_text REGEXP :tools"; |
||
144 | $params['tools'] = $regex; |
||
145 | } |
||
146 | if ('' != $tagIds) { |
||
147 | $tagJoin = "LEFT OUTER JOIN $tagTable ON ct_rev_id = rev_id"; |
||
148 | $condTools[] = "ct_tag_id IN ($tagIds)"; |
||
149 | } |
||
150 | $condTool = 'AND (' . implode(' OR ', $condTools) . ')'; |
||
151 | |||
152 | $sql = "SELECT COUNT(DISTINCT(rev_id)) |
||
153 | FROM $revisionTable |
||
154 | $ipcJoin |
||
155 | $pageJoin |
||
156 | $commentJoin |
||
157 | $tagJoin |
||
158 | WHERE $whereClause |
||
159 | $condNamespace |
||
160 | $condTool |
||
161 | $revDateConditions"; |
||
162 | |||
163 | $resultQuery = $this->executeQuery($sql, $project, $user, $namespace, $params); |
||
164 | $result = (int)$resultQuery->fetchOne(); |
||
165 | |||
166 | // Cache and return. |
||
167 | return $this->setCache($cacheKey, $result); |
||
168 | } |
||
169 | |||
170 | /** |
||
171 | * Get non-automated contributions for the given user. |
||
172 | * @param Project $project |
||
173 | * @param User $user |
||
174 | * @param string|int $namespace Namespace ID or 'all'. |
||
175 | * @param int|false $start Start date as Unix timestamp. |
||
176 | * @param int|false $end End date as Unix timestamp. |
||
177 | * @param int|false $offset Unix timestamp. Used for pagination. |
||
178 | * @param int $limit Number of results to return. |
||
179 | * @return string[] Result of query, with columns 'page_title', 'page_namespace', 'rev_id', 'timestamp', 'minor', |
||
180 | * 'length', 'length_change', 'comment'. |
||
181 | */ |
||
182 | public function getNonAutomatedEdits( |
||
250 | } |
||
251 | |||
252 | /** |
||
253 | * Get (semi-)automated contributions for the given user, and optionally for a given tool. |
||
254 | * @param Project $project |
||
255 | * @param User $user |
||
256 | * @param string|int $namespace Namespace ID or 'all'. |
||
257 | * @param int|false $start Start date as Unix timestamp. |
||
258 | * @param int|false $end End date as Unix timestamp. |
||
259 | * @param string|null $tool Only get edits made with this tool. Must match the keys in the AutoEdits config. |
||
260 | * @param int|false $offset Unix timestamp. Used for pagination. |
||
261 | * @param int $limit Number of results to return. |
||
262 | * @return string[] Result of query, with columns 'page_title', 'page_namespace', 'rev_id', 'timestamp', 'minor', |
||
263 | * 'length', 'length_change', 'comment'. |
||
264 | */ |
||
265 | public function getAutomatedEdits( |
||
266 | Project $project, |
||
267 | User $user, |
||
268 | $namespace = 'all', |
||
269 | $start = false, |
||
270 | $end = false, |
||
271 | ?string $tool = null, |
||
272 | $offset = false, |
||
273 | int $limit = 50 |
||
274 | ): array { |
||
275 | $cacheKey = $this->getCacheKey(func_get_args(), 'user_autoedits'); |
||
276 | if (!$this->useSandbox && $this->cache->hasItem($cacheKey)) { |
||
277 | return $this->cache->getItem($cacheKey)->get(); |
||
278 | } |
||
279 | |||
280 | $revDateConditions = $this->getDateConditions($start, $end, $offset, 'revs.'); |
||
281 | |||
282 | // In this case there is a slight performance improvement we can make if we're not given a start date. |
||
283 | if ('' === $revDateConditions) { |
||
284 | $revDateConditions = 'AND revs.rev_timestamp > 0'; |
||
285 | } |
||
286 | |||
287 | // Get the combined regex and tags for the tools |
||
288 | [$regex, $tagIds] = $this->getToolRegexAndTags($project, false, $tool); |
||
289 | |||
290 | $pageTable = $project->getTableName('page'); |
||
291 | $revisionTable = $project->getTableName('revision'); |
||
292 | $ipcTable = $project->getTableName('ip_changes'); |
||
293 | $commentTable = $project->getTableName('comment', 'revision'); |
||
294 | $tagTable = $project->getTableName('change_tag'); |
||
295 | $condNamespace = 'all' === $namespace ? '' : 'AND page_namespace = :namespace'; |
||
296 | $tagJoin = ''; |
||
297 | $condsTool = []; |
||
298 | |||
299 | if ('' != $regex) { |
||
300 | $condsTool[] = 'comment_text RLIKE :tools'; |
||
301 | } |
||
302 | |||
303 | if ('' != $tagIds) { |
||
304 | $tagJoin = "LEFT OUTER JOIN $tagTable ON (ct_rev_id = revs.rev_id)"; |
||
305 | $condsTool[] = "ct_tag_id IN ($tagIds)"; |
||
306 | } |
||
307 | |||
308 | // IP range handling. |
||
309 | $ipcJoin = ''; |
||
310 | $whereClause = 'revs.rev_actor = :actorId'; |
||
311 | $params = ['tools' => $regex]; |
||
312 | if ($user->isIpRange()) { |
||
313 | $ipcJoin = "JOIN $ipcTable ON rev_id = ipc_rev_id"; |
||
314 | $whereClause = 'ipc_hex BETWEEN :startIp AND :endIp'; |
||
315 | [$params['startIp'], $params['endIp']] = IPUtils::parseRange($user->getUsername()); |
||
316 | } |
||
317 | |||
318 | $sql = "SELECT |
||
319 | page_title, |
||
320 | page_namespace, |
||
321 | revs.rev_id AS rev_id, |
||
322 | revs.rev_timestamp AS timestamp, |
||
323 | revs.rev_minor_edit AS minor, |
||
324 | revs.rev_len AS length, |
||
325 | (CAST(revs.rev_len AS SIGNED) - IFNULL(parentrevs.rev_len, 0)) AS length_change, |
||
326 | comment_text AS comment |
||
327 | FROM $pageTable |
||
328 | JOIN $revisionTable AS revs ON (page_id = revs.rev_page) |
||
329 | $ipcJoin |
||
330 | LEFT JOIN $revisionTable AS parentrevs ON (revs.rev_parent_id = parentrevs.rev_id) |
||
331 | LEFT OUTER JOIN $commentTable ON (revs.rev_comment_id = comment_id) |
||
332 | $tagJoin |
||
333 | WHERE $whereClause |
||
334 | $revDateConditions |
||
335 | $condNamespace |
||
336 | AND (".implode(' OR ', $condsTool).") |
||
337 | GROUP BY revs.rev_id |
||
338 | ORDER BY revs.rev_timestamp DESC |
||
339 | LIMIT $limit"; |
||
340 | |||
341 | $resultQuery = $this->executeQuery($sql, $project, $user, $namespace, $params); |
||
342 | $result = $resultQuery->fetchAllAssociative(); |
||
343 | |||
344 | // Cache and return. |
||
345 | return $this->setCache($cacheKey, $result); |
||
346 | } |
||
347 | |||
348 | /** |
||
349 | * Get counts of known automated tools used by the given user. |
||
350 | * @param Project $project |
||
351 | * @param User $user |
||
352 | * @param string|int $namespace Namespace ID or 'all'. |
||
353 | * @param int|false $start Start date as Unix timestamp. |
||
354 | * @param int|false $end End date as Unix timestamp. |
||
355 | * @return string[] Each tool that they used along with the count and link: |
||
356 | * [ |
||
357 | * 'Twinkle' => [ |
||
358 | * 'count' => 50, |
||
359 | * 'link' => 'Wikipedia:Twinkle', |
||
360 | * ], |
||
361 | * ] |
||
362 | */ |
||
363 | public function getToolCounts(Project $project, User $user, $namespace = 'all', $start = false, $end = false): array |
||
364 | { |
||
365 | $cacheKey = $this->getCacheKey(func_get_args(), 'user_autotoolcounts'); |
||
366 | if (!$this->useSandbox && $this->cache->hasItem($cacheKey)) { |
||
367 | return $this->cache->getItem($cacheKey)->get(); |
||
368 | } |
||
369 | |||
370 | $sql = $this->getAutomatedCountsSql($project, $user, $namespace, $start, $end); |
||
371 | $params = []; |
||
372 | if ($user->isIpRange()) { |
||
373 | [$params['startIp'], $params['endIp']] = IPUtils::parseRange($user->getUsername()); |
||
374 | } |
||
375 | $resultQuery = $this->executeQuery($sql, $project, $user, $namespace, $params); |
||
376 | |||
377 | $tools = $this->getTools($project, $namespace); |
||
378 | |||
379 | // handling results |
||
380 | $results = []; |
||
381 | |||
382 | while ($row = $resultQuery->fetchAssociative()) { |
||
383 | // Only track tools that they've used at least once |
||
384 | $tool = $row['toolname']; |
||
385 | if ($row['count'] > 0) { |
||
386 | $results[$tool] = [ |
||
387 | 'link' => $tools[$tool]['link'], |
||
388 | 'label' => $tools[$tool]['label'] ?? $tool, |
||
389 | 'count' => $row['count'], |
||
390 | ]; |
||
391 | } |
||
392 | } |
||
393 | |||
394 | // Sort the array by count |
||
395 | uasort($results, function ($a, $b) { |
||
396 | return $b['count'] - $a['count']; |
||
397 | }); |
||
398 | |||
399 | // Cache and return. |
||
400 | return $this->setCache($cacheKey, $results); |
||
401 | } |
||
402 | |||
403 | /** |
||
404 | * Get SQL for getting counts of known automated tools used by the user. |
||
405 | * @see self::getAutomatedCounts() |
||
406 | * @param Project $project |
||
407 | * @param User $user |
||
408 | * @param string|int $namespace Namespace ID or 'all'. |
||
409 | * @param int|false $start Start date as Unix timestamp. |
||
410 | * @param int|false $end End date as Unix timestamp. |
||
411 | * @return string The SQL. |
||
412 | */ |
||
413 | private function getAutomatedCountsSql( |
||
414 | Project $project, |
||
415 | User $user, |
||
416 | $namespace, |
||
417 | $start = false, |
||
418 | $end = false |
||
419 | ): string { |
||
420 | $revDateConditions = $this->getDateConditions($start, $end); |
||
421 | |||
422 | // Load the semi-automated edit types. |
||
423 | $tools = $this->getTools($project, $namespace); |
||
424 | |||
425 | // Create a collection of queries that we're going to run. |
||
426 | $queries = []; |
||
427 | |||
428 | $revisionTable = $project->getTableName('revision'); |
||
429 | $ipcTable = $project->getTableName('ip_changes'); |
||
430 | [$pageJoin, $condNamespace] = $this->getPageAndNamespaceSql($project, $namespace); |
||
431 | $conn = $this->getProjectsConnection($project); |
||
432 | |||
433 | // IP range handling. |
||
434 | $ipcJoin = ''; |
||
435 | $whereClause = 'rev_actor = :actorId'; |
||
436 | if ($user->isIpRange()) { |
||
437 | $ipcJoin = "JOIN $ipcTable ON rev_id = ipc_rev_id"; |
||
438 | $whereClause = 'ipc_hex BETWEEN :startIp AND :endIp'; |
||
439 | } |
||
440 | |||
441 | foreach ($tools as $toolName => $values) { |
||
442 | [$condTool, $commentJoin, $tagJoin] = $this->getInnerAutomatedCountsSql($project, $toolName, $values); |
||
443 | |||
444 | $toolName = $conn->quote($toolName, PDO::PARAM_STR); |
||
445 | |||
446 | // No regex or tag provided for this tool. This can happen for tag-only tools that are in the global |
||
447 | // configuration, but no local tag exists on the said project. |
||
448 | if ('' === $condTool) { |
||
449 | continue; |
||
450 | } |
||
451 | |||
452 | $queries[] .= " |
||
453 | SELECT $toolName AS toolname, COUNT(DISTINCT(rev_id)) AS count |
||
454 | FROM $revisionTable |
||
455 | $ipcJoin |
||
456 | $pageJoin |
||
457 | $commentJoin |
||
458 | $tagJoin |
||
459 | WHERE $whereClause |
||
460 | AND $condTool |
||
461 | $condNamespace |
||
462 | $revDateConditions"; |
||
463 | } |
||
464 | |||
465 | // Combine to one big query. |
||
466 | return implode(' UNION ', $queries); |
||
467 | } |
||
468 | |||
469 | /** |
||
470 | * Get some of the inner SQL for self::getAutomatedCountsSql(). |
||
471 | * @param Project $project |
||
472 | * @param string $toolName |
||
473 | * @param string[] $values Values as defined in the AutoEdits config. |
||
474 | * @return string[] [Equality clause, JOIN clause] |
||
475 | */ |
||
476 | private function getInnerAutomatedCountsSql(Project $project, string $toolName, array $values): array |
||
477 | { |
||
478 | $conn = $this->getProjectsConnection($project); |
||
479 | $commentJoin = ''; |
||
480 | $tagJoin = ''; |
||
481 | $condTool = ''; |
||
482 | |||
483 | if (isset($values['regex'])) { |
||
484 | $commentTable = $project->getTableName('comment', 'revision'); |
||
485 | $commentJoin = "LEFT OUTER JOIN $commentTable ON rev_comment_id = comment_id"; |
||
486 | $regex = $conn->quote($values['regex'], PDO::PARAM_STR); |
||
487 | $condTool = "comment_text REGEXP $regex"; |
||
488 | } |
||
489 | if (isset($values['tags'])) { |
||
490 | $tagIds = $this->getTagIdsFromNames($project, $values['tags']); |
||
|
|||
491 | |||
492 | if ($tagIds) { |
||
493 | $tagTable = $project->getTableName('change_tag'); |
||
494 | $tagJoin = "LEFT OUTER JOIN $tagTable ON ct_rev_id = rev_id"; |
||
495 | $tagClause = $this->getTagsExclusionsSql($project, $toolName, $tagIds); |
||
496 | |||
497 | // Use tags in addition to the regex clause, if already present. |
||
498 | // Tags are more reliable but may not be present for edits made with |
||
499 | // older versions of the tool, before it started adding tags. |
||
500 | if ('' === $condTool) { |
||
501 | $condTool = $tagClause; |
||
502 | } else { |
||
503 | $condTool = "($condTool OR $tagClause)"; |
||
504 | } |
||
505 | } |
||
506 | } |
||
507 | |||
508 | return [$condTool, $commentJoin, $tagJoin]; |
||
509 | } |
||
510 | |||
511 | /** |
||
512 | * Get the combined regex and tags for all semi-automated tools, or the given tool, ready to be used in a query. |
||
513 | * @param Project $project |
||
514 | * @param bool $nonAutoEdits Set to true to exclude tools with the 'contribs' flag. |
||
515 | * @param string|null $tool |
||
516 | * @param int|string|null $namespace Tools only used in given namespace ID, or 'all' for all namespaces. |
||
517 | * @return array In the format: ['combined|regex', '1,2,3'] where the second element is a |
||
518 | * comma-separated list of the tag IDs, ready to be used in SQL. |
||
519 | */ |
||
520 | private function getToolRegexAndTags( |
||
557 | ]; |
||
558 | } |
||
559 | |||
560 | /** |
||
561 | * Get the IDs of tags for given Project, which are used in the IN clauses of other queries above. |
||
562 | * This join decomposition is actually faster than JOIN'ing on change_tag_def all in one query. |
||
563 | * @param Project $project |
||
564 | * @return int[] Keys are the tag name, values are the IDs. |
||
565 | */ |
||
566 | public function getTags(Project $project): array |
||
602 | } |
||
603 | |||
604 | /** |
||
605 | * Generate the WHERE clause to query for the given tags, filtering out exclusions ('tag_excludes' option). |
||
606 | * For instance, Huggle edits are also tagged as Rollback, but when viewing |
||
607 | * Rollback edits we don't want to show Huggle edits. |
||
608 | * @param Project $project |
||
609 | * @param string $tool |
||
610 | * @param array $tagIds |
||
611 | * @return string |
||
612 | */ |
||
613 | private function getTagsExclusionsSql(Project $project, string $tool, array $tagIds): string |
||
614 | { |
||
615 | $tagsList = implode(',', $tagIds); |
||
616 | $tagExcludes = $this->getTools($project)[$tool]['tag_excludes'] ?? []; |
||
617 | $excludesSql = ''; |
||
618 | |||
619 | if ($tagExcludes && 1 === count($tagIds)) { |
||
620 | // Get tag IDs, filtering out those for which no ID exists (meaning there is no local tag for that tool). |
||
621 | $excludesList = implode(',', array_filter(array_map(function ($tagName) use ($project) { |
||
622 | return $this->getTags($project)[$tagName] ?? null; |
||
623 | }, $tagExcludes))); |
||
624 | |||
625 | if (strlen($excludesList)) { |
||
626 | $excludesSql = "AND ct_tag_id NOT IN ($excludesList)"; |
||
627 | } |
||
628 | } |
||
629 | |||
630 | return "ct_tag_id IN ($tagsList) $excludesSql"; |
||
631 | } |
||
632 | |||
633 | /** |
||
634 | * Get IDs for tags given the names. |
||
635 | * @param Project $project |
||
636 | * @param array $tagNames |
||
637 | * @return array |
||
638 | */ |
||
639 | private function getTagIdsFromNames(Project $project, array $tagNames): array |
||
651 | } |
||
652 | } |
||
653 |