Total Complexity | 68 |
Total Lines | 636 |
Duplicated Lines | 0 % |
Changes | 0 |
Complex classes like AutoEditsRepository often do a lot of different things. To break such a class down, we need to identify a cohesive component within that class. A common approach to find such a component is to look for fields/methods that share the same prefixes, or suffixes.
Once you have determined the fields that belong together, you can apply the Extract Class refactoring. If the component makes sense as a sub-class, Extract Subclass is also a candidate, and is often faster.
While breaking up the class, it is a good idea to analyze how other classes use AutoEditsRepository, and based on these observations, apply Extract Interface, too.
1 | <?php |
||
20 | class AutoEditsRepository extends UserRepository |
||
21 | { |
||
22 | /** @var array List of automated tools, used for fetching the tool list and filtering it. */ |
||
23 | private $aeTools; |
||
24 | |||
25 | /** @var bool Whether to use the /sandbox version of the config, bypassing caching. */ |
||
26 | private $useSandbox; |
||
27 | |||
28 | /** @var array Process cache for tags/IDs. */ |
||
29 | private $tags; |
||
30 | |||
31 | /** |
||
32 | * AutoEditsRepository constructor. Used solely to set $useSandbox (from AutomatedEditsController). |
||
33 | * @param bool $useSandbox |
||
34 | */ |
||
35 | public function __construct(bool $useSandbox = false) |
||
36 | { |
||
37 | parent::__construct(); |
||
38 | $this->useSandbox = $useSandbox; |
||
39 | } |
||
40 | |||
41 | /** |
||
42 | * Method to give the repository access to the AutomatedEditsHelper and fetch the list of semi-automated tools. |
||
43 | * @param Project $project |
||
44 | * @param int|string $namespace Namespace ID or 'all'. |
||
45 | * @return array |
||
46 | */ |
||
47 | public function getTools(Project $project, $namespace = 'all'): array |
||
48 | { |
||
49 | if (!isset($this->aeTools)) { |
||
50 | $this->aeTools = $this->container |
||
51 | ->get('app.automated_edits_helper') |
||
52 | ->getTools($project, $this->useSandbox); |
||
53 | } |
||
54 | |||
55 | if ('all' !== $namespace) { |
||
56 | // Limit by namespace. |
||
57 | return array_filter($this->aeTools, function (array $tool) use ($namespace) { |
||
58 | return empty($tool['namespaces']) || |
||
59 | in_array((int)$namespace, $tool['namespaces']) || |
||
60 | ( |
||
61 | 1 === $namespace % 2 && |
||
62 | isset($tool['talk_namespaces']) |
||
63 | ); |
||
64 | }); |
||
65 | } |
||
66 | |||
67 | return $this->aeTools; |
||
68 | } |
||
69 | |||
70 | /** |
||
71 | * Get tools that were misconfigured, also removing them from $this->aeTools. |
||
72 | * @param Project $project |
||
73 | * @return string[] Labels for the invalid tools. |
||
74 | */ |
||
75 | public function getInvalidTools(Project $project): array |
||
76 | { |
||
77 | $tools = $this->getTools($project); |
||
78 | $invalidTools = $tools['invalid'] ?? []; |
||
79 | unset($this->aeTools['invalid']); |
||
80 | return $invalidTools; |
||
81 | } |
||
82 | |||
83 | /** |
||
84 | * Overrides Repository::setCache(), and will not call the parent (which sets the cache) if using the sandbox. |
||
85 | * @inheritDoc |
||
86 | */ |
||
87 | public function setCache(string $cacheKey, $value, $duration = 'PT20M') |
||
88 | { |
||
89 | if ($this->useSandbox) { |
||
90 | return $value; |
||
91 | } |
||
92 | |||
93 | return parent::setCache($cacheKey, $value, $duration); |
||
94 | } |
||
95 | |||
96 | /** |
||
97 | * Get the number of edits this user made using semi-automated tools. |
||
98 | * @param Project $project |
||
99 | * @param User $user |
||
100 | * @param string|int $namespace Namespace ID or 'all' |
||
101 | * @param int|false $start Start date as Unix timestamp. |
||
102 | * @param int|false $end End date as Unix timestamp. |
||
103 | * @return int Result of query, see below. |
||
104 | */ |
||
105 | public function countAutomatedEdits( |
||
106 | Project $project, |
||
107 | User $user, |
||
108 | $namespace = 'all', |
||
109 | $start = false, |
||
110 | $end = false |
||
111 | ): int { |
||
112 | $cacheKey = $this->getCacheKey(func_get_args(), 'user_autoeditcount'); |
||
113 | if (!$this->useSandbox && $this->cache->hasItem($cacheKey)) { |
||
114 | return $this->cache->getItem($cacheKey)->get(); |
||
115 | } |
||
116 | |||
117 | $revDateConditions = $this->getDateConditions($start, $end); |
||
118 | |||
119 | // Get the combined regex and tags for the tools |
||
120 | [$regex, $tagIds] = $this->getToolRegexAndTags($project, false, null, $namespace); |
||
121 | |||
122 | [$pageJoin, $condNamespace] = $this->getPageAndNamespaceSql($project, $namespace); |
||
123 | |||
124 | $revisionTable = $project->getTableName('revision'); |
||
125 | $ipcTable = $project->getTableName('ip_changes'); |
||
126 | $commentTable = $project->getTableName('comment', 'revision'); |
||
127 | $tagTable = $project->getTableName('change_tag'); |
||
128 | $commentJoin = ''; |
||
129 | $tagJoin = ''; |
||
130 | |||
131 | $params = []; |
||
132 | |||
133 | // IP range handling. |
||
134 | $ipcJoin = ''; |
||
135 | $whereClause = 'rev_actor = :actorId'; |
||
136 | if ($user->isIpRange()) { |
||
137 | $ipcJoin = "JOIN $ipcTable ON rev_id = ipc_rev_id"; |
||
138 | $whereClause = 'ipc_hex BETWEEN :startIp AND :endIp'; |
||
139 | [$params['startIp'], $params['endIp']] = IPUtils::parseRange($user->getUsername()); |
||
140 | } |
||
141 | |||
142 | // Build SQL for detecting AutoEdits via regex and/or tags. |
||
143 | $condTools = []; |
||
144 | if ('' != $regex) { |
||
145 | $commentJoin = "LEFT OUTER JOIN $commentTable ON rev_comment_id = comment_id"; |
||
146 | $condTools[] = "comment_text REGEXP :tools"; |
||
147 | $params['tools'] = $regex; |
||
148 | } |
||
149 | if ('' != $tagIds) { |
||
150 | $tagJoin = "LEFT OUTER JOIN $tagTable ON ct_rev_id = rev_id"; |
||
151 | $condTools[] = "ct_tag_id IN ($tagIds)"; |
||
152 | } |
||
153 | $condTool = 'AND (' . implode(' OR ', $condTools) . ')'; |
||
154 | |||
155 | $sql = "SELECT COUNT(DISTINCT(rev_id)) |
||
156 | FROM $revisionTable |
||
157 | $ipcJoin |
||
158 | $pageJoin |
||
159 | $commentJoin |
||
160 | $tagJoin |
||
161 | WHERE $whereClause |
||
162 | $condNamespace |
||
163 | $condTool |
||
164 | $revDateConditions"; |
||
165 | |||
166 | $resultQuery = $this->executeQuery($sql, $project, $user, $namespace, $params); |
||
167 | $result = (int)$resultQuery->fetchColumn(); |
||
|
|||
168 | |||
169 | // Cache and return. |
||
170 | return $this->setCache($cacheKey, $result); |
||
171 | } |
||
172 | |||
173 | /** |
||
174 | * Get non-automated contributions for the given user. |
||
175 | * @param Project $project |
||
176 | * @param User $user |
||
177 | * @param string|int $namespace Namespace ID or 'all'. |
||
178 | * @param int|false $start Start date as Unix timestamp. |
||
179 | * @param int|false $end End date as Unix timestamp. |
||
180 | * @param int|false $offset Unix timestamp. Used for pagination. |
||
181 | * @param int $limit Number of results to return. |
||
182 | * @return string[] Result of query, with columns 'page_title', 'page_namespace', 'rev_id', 'timestamp', 'minor', |
||
183 | * 'length', 'length_change', 'comment'. |
||
184 | */ |
||
185 | public function getNonAutomatedEdits( |
||
253 | } |
||
254 | |||
255 | /** |
||
256 | * Get (semi-)automated contributions for the given user, and optionally for a given tool. |
||
257 | * @param Project $project |
||
258 | * @param User $user |
||
259 | * @param string|int $namespace Namespace ID or 'all'. |
||
260 | * @param int|false $start Start date as Unix timestamp. |
||
261 | * @param int|false $end End date as Unix timestamp. |
||
262 | * @param string|null $tool Only get edits made with this tool. Must match the keys in the AutoEdits config. |
||
263 | * @param int|false $offset Unix timestamp. Used for pagination. |
||
264 | * @param int $limit Number of results to return. |
||
265 | * @return string[] Result of query, with columns 'page_title', 'page_namespace', 'rev_id', 'timestamp', 'minor', |
||
266 | * 'length', 'length_change', 'comment'. |
||
267 | */ |
||
268 | public function getAutomatedEdits( |
||
269 | Project $project, |
||
270 | User $user, |
||
271 | $namespace = 'all', |
||
272 | $start = false, |
||
273 | $end = false, |
||
274 | ?string $tool = null, |
||
275 | $offset = false, |
||
276 | int $limit = 50 |
||
277 | ): array { |
||
278 | $cacheKey = $this->getCacheKey(func_get_args(), 'user_autoedits'); |
||
279 | if (!$this->useSandbox && $this->cache->hasItem($cacheKey)) { |
||
280 | return $this->cache->getItem($cacheKey)->get(); |
||
281 | } |
||
282 | |||
283 | $revDateConditions = $this->getDateConditions($start, $end, $offset, 'revs.'); |
||
284 | |||
285 | // In this case there is a slight performance improvement we can make if we're not given a start date. |
||
286 | if ('' === $revDateConditions) { |
||
287 | $revDateConditions = 'AND revs.rev_timestamp > 0'; |
||
288 | } |
||
289 | |||
290 | // Get the combined regex and tags for the tools |
||
291 | [$regex, $tagIds] = $this->getToolRegexAndTags($project, false, $tool); |
||
292 | |||
293 | $pageTable = $project->getTableName('page'); |
||
294 | $revisionTable = $project->getTableName('revision'); |
||
295 | $ipcTable = $project->getTableName('ip_changes'); |
||
296 | $commentTable = $project->getTableName('comment', 'revision'); |
||
297 | $tagTable = $project->getTableName('change_tag'); |
||
298 | $condNamespace = 'all' === $namespace ? '' : 'AND page_namespace = :namespace'; |
||
299 | $tagJoin = ''; |
||
300 | $condsTool = []; |
||
301 | |||
302 | if ('' != $regex) { |
||
303 | $condsTool[] = 'comment_text RLIKE :tools'; |
||
304 | } |
||
305 | |||
306 | if ('' != $tagIds) { |
||
307 | $tagJoin = "LEFT OUTER JOIN $tagTable ON (ct_rev_id = revs.rev_id)"; |
||
308 | $condsTool[] = "ct_tag_id IN ($tagIds)"; |
||
309 | } |
||
310 | |||
311 | // IP range handling. |
||
312 | $ipcJoin = ''; |
||
313 | $whereClause = 'revs.rev_actor = :actorId'; |
||
314 | $params = ['tools' => $regex]; |
||
315 | if ($user->isIpRange()) { |
||
316 | $ipcJoin = "JOIN $ipcTable ON rev_id = ipc_rev_id"; |
||
317 | $whereClause = 'ipc_hex BETWEEN :startIp AND :endIp'; |
||
318 | [$params['startIp'], $params['endIp']] = IPUtils::parseRange($user->getUsername()); |
||
319 | } |
||
320 | |||
321 | $sql = "SELECT |
||
322 | page_title, |
||
323 | page_namespace, |
||
324 | revs.rev_id AS rev_id, |
||
325 | revs.rev_timestamp AS timestamp, |
||
326 | revs.rev_minor_edit AS minor, |
||
327 | revs.rev_len AS length, |
||
328 | (CAST(revs.rev_len AS SIGNED) - IFNULL(parentrevs.rev_len, 0)) AS length_change, |
||
329 | comment_text AS comment |
||
330 | FROM $pageTable |
||
331 | JOIN $revisionTable AS revs ON (page_id = revs.rev_page) |
||
332 | $ipcJoin |
||
333 | LEFT JOIN $revisionTable AS parentrevs ON (revs.rev_parent_id = parentrevs.rev_id) |
||
334 | LEFT OUTER JOIN $commentTable ON (revs.rev_comment_id = comment_id) |
||
335 | $tagJoin |
||
336 | WHERE $whereClause |
||
337 | $revDateConditions |
||
338 | $condNamespace |
||
339 | AND (".implode(' OR ', $condsTool).") |
||
340 | GROUP BY revs.rev_id |
||
341 | ORDER BY revs.rev_timestamp DESC |
||
342 | LIMIT $limit"; |
||
343 | |||
344 | $resultQuery = $this->executeQuery($sql, $project, $user, $namespace, $params); |
||
345 | $result = $resultQuery->fetchAll(); |
||
346 | |||
347 | // Cache and return. |
||
348 | return $this->setCache($cacheKey, $result); |
||
349 | } |
||
350 | |||
351 | /** |
||
352 | * Get counts of known automated tools used by the given user. |
||
353 | * @param Project $project |
||
354 | * @param User $user |
||
355 | * @param string|int $namespace Namespace ID or 'all'. |
||
356 | * @param int|false $start Start date as Unix timestamp. |
||
357 | * @param int|false $end End date as Unix timestamp. |
||
358 | * @return string[] Each tool that they used along with the count and link: |
||
359 | * [ |
||
360 | * 'Twinkle' => [ |
||
361 | * 'count' => 50, |
||
362 | * 'link' => 'Wikipedia:Twinkle', |
||
363 | * ], |
||
364 | * ] |
||
365 | */ |
||
366 | public function getToolCounts(Project $project, User $user, $namespace = 'all', $start = false, $end = false): array |
||
367 | { |
||
368 | $cacheKey = $this->getCacheKey(func_get_args(), 'user_autotoolcounts'); |
||
369 | if (!$this->useSandbox && $this->cache->hasItem($cacheKey)) { |
||
370 | return $this->cache->getItem($cacheKey)->get(); |
||
371 | } |
||
372 | |||
373 | $sql = $this->getAutomatedCountsSql($project, $user, $namespace, $start, $end); |
||
374 | $params = []; |
||
375 | if ($user->isIpRange()) { |
||
376 | [$params['startIp'], $params['endIp']] = IPUtils::parseRange($user->getUsername()); |
||
377 | } |
||
378 | $resultQuery = $this->executeQuery($sql, $project, $user, $namespace, $params); |
||
379 | |||
380 | $tools = $this->getTools($project, $namespace); |
||
381 | |||
382 | // handling results |
||
383 | $results = []; |
||
384 | |||
385 | while ($row = $resultQuery->fetch()) { |
||
386 | // Only track tools that they've used at least once |
||
387 | $tool = $row['toolname']; |
||
388 | if ($row['count'] > 0) { |
||
389 | $results[$tool] = [ |
||
390 | 'link' => $tools[$tool]['link'], |
||
391 | 'label' => $tools[$tool]['label'] ?? $tool, |
||
392 | 'count' => $row['count'], |
||
393 | ]; |
||
394 | } |
||
395 | } |
||
396 | |||
397 | // Sort the array by count |
||
398 | uasort($results, function ($a, $b) { |
||
399 | return $b['count'] - $a['count']; |
||
400 | }); |
||
401 | |||
402 | // Cache and return. |
||
403 | return $this->setCache($cacheKey, $results); |
||
404 | } |
||
405 | |||
406 | /** |
||
407 | * Get SQL for getting counts of known automated tools used by the user. |
||
408 | * @see self::getAutomatedCounts() |
||
409 | * @param Project $project |
||
410 | * @param User $user |
||
411 | * @param string|int $namespace Namespace ID or 'all'. |
||
412 | * @param int|false $start Start date as Unix timestamp. |
||
413 | * @param int|false $end End date as Unix timestamp. |
||
414 | * @return string The SQL. |
||
415 | */ |
||
416 | private function getAutomatedCountsSql( |
||
417 | Project $project, |
||
418 | User $user, |
||
419 | $namespace, |
||
420 | $start = false, |
||
421 | $end = false |
||
422 | ): string { |
||
423 | $revDateConditions = $this->getDateConditions($start, $end); |
||
424 | |||
425 | // Load the semi-automated edit types. |
||
426 | $tools = $this->getTools($project, $namespace); |
||
427 | |||
428 | // Create a collection of queries that we're going to run. |
||
429 | $queries = []; |
||
430 | |||
431 | $revisionTable = $project->getTableName('revision'); |
||
432 | $ipcTable = $project->getTableName('ip_changes'); |
||
433 | [$pageJoin, $condNamespace] = $this->getPageAndNamespaceSql($project, $namespace); |
||
434 | $conn = $this->getProjectsConnection($project); |
||
435 | |||
436 | // IP range handling. |
||
437 | $ipcJoin = ''; |
||
438 | $whereClause = 'rev_actor = :actorId'; |
||
439 | if ($user->isIpRange()) { |
||
440 | $ipcJoin = "JOIN $ipcTable ON rev_id = ipc_rev_id"; |
||
441 | $whereClause = 'ipc_hex BETWEEN :startIp AND :endIp'; |
||
442 | } |
||
443 | |||
444 | foreach ($tools as $toolName => $values) { |
||
445 | [$condTool, $commentJoin, $tagJoin] = $this->getInnerAutomatedCountsSql($project, $toolName, $values); |
||
446 | |||
447 | $toolName = $conn->quote($toolName, PDO::PARAM_STR); |
||
448 | |||
449 | // No regex or tag provided for this tool. This can happen for tag-only tools that are in the global |
||
450 | // configuration, but no local tag exists on the said project. |
||
451 | if ('' === $condTool) { |
||
452 | continue; |
||
453 | } |
||
454 | |||
455 | $queries[] .= " |
||
456 | SELECT $toolName AS toolname, COUNT(DISTINCT(rev_id)) AS count |
||
457 | FROM $revisionTable |
||
458 | $ipcJoin |
||
459 | $pageJoin |
||
460 | $commentJoin |
||
461 | $tagJoin |
||
462 | WHERE $whereClause |
||
463 | AND $condTool |
||
464 | $condNamespace |
||
465 | $revDateConditions"; |
||
466 | } |
||
467 | |||
468 | // Combine to one big query. |
||
469 | return implode(' UNION ', $queries); |
||
470 | } |
||
471 | |||
472 | /** |
||
473 | * Get some of the inner SQL for self::getAutomatedCountsSql(). |
||
474 | * @param Project $project |
||
475 | * @param string $toolName |
||
476 | * @param string[] $values Values as defined in the AutoEdits config. |
||
477 | * @return string[] [Equality clause, JOIN clause] |
||
478 | */ |
||
479 | private function getInnerAutomatedCountsSql(Project $project, string $toolName, array $values): array |
||
480 | { |
||
481 | $conn = $this->getProjectsConnection($project); |
||
482 | $commentJoin = ''; |
||
483 | $tagJoin = ''; |
||
484 | $condTool = ''; |
||
485 | |||
486 | if (isset($values['regex'])) { |
||
487 | $commentTable = $project->getTableName('comment', 'revision'); |
||
488 | $commentJoin = "LEFT OUTER JOIN $commentTable ON rev_comment_id = comment_id"; |
||
489 | $regex = $conn->quote($values['regex'], PDO::PARAM_STR); |
||
490 | $condTool = "comment_text REGEXP $regex"; |
||
491 | } |
||
492 | if (isset($values['tags'])) { |
||
493 | $tagIds = $this->getTagIdsFromNames($project, $values['tags']); |
||
494 | |||
495 | if ($tagIds) { |
||
496 | $tagTable = $project->getTableName('change_tag'); |
||
497 | $tagJoin = "LEFT OUTER JOIN $tagTable ON ct_rev_id = rev_id"; |
||
498 | $tagClause = $this->getTagsExclusionsSql($project, $toolName, $tagIds); |
||
499 | |||
500 | // Use tags in addition to the regex clause, if already present. |
||
501 | // Tags are more reliable but may not be present for edits made with |
||
502 | // older versions of the tool, before it started adding tags. |
||
503 | if ('' === $condTool) { |
||
504 | $condTool = $tagClause; |
||
505 | } else { |
||
506 | $condTool = "($condTool OR $tagClause)"; |
||
507 | } |
||
508 | } |
||
509 | } |
||
510 | |||
511 | return [$condTool, $commentJoin, $tagJoin]; |
||
512 | } |
||
513 | |||
514 | /** |
||
515 | * Get the combined regex and tags for all semi-automated tools, or the given tool, ready to be used in a query. |
||
516 | * @param Project $project |
||
517 | * @param bool $nonAutoEdits Set to true to exclude tools with the 'contribs' flag. |
||
518 | * @param string|null $tool |
||
519 | * @param int|string|null $namespace Tools only used in given namespace ID, or 'all' for all namespaces. |
||
520 | * @return array In the format: ['combined|regex', '1,2,3'] where the second element is a |
||
521 | * comma-separated list of the tag IDs, ready to be used in SQL. |
||
522 | */ |
||
523 | private function getToolRegexAndTags( |
||
560 | ]; |
||
561 | } |
||
562 | |||
563 | /** |
||
564 | * Get the IDs of tags for given Project, which are used in the IN clauses of other queries above. |
||
565 | * This join decomposition is actually faster than JOIN'ing on change_tag_def all in one query. |
||
566 | * @param Project $project |
||
567 | * @return int[] Keys are the tag name, values are the IDs. |
||
568 | */ |
||
569 | public function getTags(Project $project): array |
||
607 | } |
||
608 | |||
609 | /** |
||
610 | * Generate the WHERE clause to query for the given tags, filtering out exclusions ('tag_excludes' option). |
||
611 | * For instance, Huggle edits are also tagged as Rollback, but when viewing |
||
612 | * Rollback edits we don't want to show Huggle edits. |
||
613 | * @param Project $project |
||
614 | * @param string $tool |
||
615 | * @param array $tagIds |
||
616 | * @return string |
||
617 | */ |
||
618 | private function getTagsExclusionsSql(Project $project, string $tool, array $tagIds): string |
||
619 | { |
||
620 | $tagsList = implode(',', $tagIds); |
||
621 | $tagExcludes = $this->getTools($project)[$tool]['tag_excludes'] ?? []; |
||
622 | $excludesSql = ''; |
||
623 | |||
624 | if ($tagExcludes && 1 === count($tagIds)) { |
||
625 | // Get tag IDs, filtering out those for which no ID exists (meaning there is no local tag for that tool). |
||
626 | $excludesList = implode(',', array_filter(array_map(function ($tagName) use ($project) { |
||
627 | return $this->getTags($project)[$tagName] ?? null; |
||
628 | }, $tagExcludes))); |
||
629 | |||
630 | if (strlen($excludesList)) { |
||
631 | $excludesSql = "AND ct_tag_id NOT IN ($excludesList)"; |
||
632 | } |
||
633 | } |
||
634 | |||
635 | return "ct_tag_id IN ($tagsList) $excludesSql"; |
||
636 | } |
||
637 | |||
638 | /** |
||
639 | * Get IDs for tags given the names. |
||
640 | * @param Project $project |
||
641 | * @param array $tagNames |
||
642 | * @return array |
||
643 | */ |
||
644 | private function getTagIdsFromNames(Project $project, array $tagNames): array |
||
656 | } |
||
657 | } |
||
658 |
This function has been deprecated. The supplier of the function has supplied an explanatory message.
The explanatory message should give you some clue as to whether and when the function will be removed and what other function to use instead.