Passed
Push — master ( 244b80...613e0d )
by MusikAnimal
06:01
created

AutoEditsRepository::getNonAutomatedEdits()   A

Complexity

Conditions 4
Paths 5

Size

Total Lines 55
Code Lines 25

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
cc 4
eloc 25
nc 5
nop 6
dl 0
loc 55
rs 9.52
c 0
b 0
f 0

How to fix   Long Method   

Long Method

Small methods make your code easier to understand, in particular if combined with a good name. Besides, if your method is small, finding a good name is usually much easier.

For example, if you find yourself adding comments to a method's body, this is usually a good sign to extract the commented part to a new method, and use the comment as a starting point when coming up with a good name for this new method.

Commonly applied refactorings include:

1
<?php
2
/**
3
 * This file contains only the AutoEditsRepository class.
4
 */
5
6
declare(strict_types = 1);
7
8
namespace AppBundle\Repository;
9
10
use AppBundle\Model\Project;
11
use AppBundle\Model\User;
12
13
/**
14
 * AutoEditsRepository is responsible for retrieving data from the database
15
 * about the automated edits made by a user.
16
 * @codeCoverageIgnore
17
 */
18
class AutoEditsRepository extends UserRepository
19
{
20
    /** @var array List of automated tools, used for fetching the tool list and filtering it. */
21
    private $aeTools;
22
23
    /**
24
     * Method to give the repository access to the AutomatedEditsHelper and fetch the list of semi-automated tools.
25
     * @param Project $project
26
     * @param int|string $namespace Namespace ID or 'all'.
27
     * @return array
28
     */
29
    public function getTools(Project $project, $namespace = 'all'): array
30
    {
31
        if (!isset($this->aeTools)) {
32
            $this->aeTools = $this->container
33
                ->get('app.automated_edits_helper')
34
                ->getTools($project);
35
        }
36
37
        if ('all' !== $namespace) {
38
            // Limit by namespace.
39
            return array_filter($this->aeTools, function (array $tool) use ($namespace) {
40
                return empty($tool['namespaces']) ||
41
                    in_array((int)$namespace, $tool['namespaces']) ||
42
                    (
43
                        1 === $namespace % 2 &&
44
                        isset($tool['talk_namespaces'])
45
                    );
46
            });
47
        }
48
49
        return $this->aeTools;
50
    }
51
52
    /**
53
     * Is the tag for given tool intended to be counted by itself? For instance,
54
     * when counting Rollback edits we don't want to also count Huggle edits (which are tagged as Rollback).
55
     * @param Project $project
56
     * @param string|null $tool
57
     * @return bool
58
     */
59
    private function usesSingleTag(Project $project, ?string $tool): bool
60
    {
61
        return isset($this->getTools($project)[$tool]['single_tag']);
62
    }
63
64
    /**
65
     * Get the number of edits this user made using semi-automated tools.
66
     * @param Project $project
67
     * @param User $user
68
     * @param string|int $namespace Namespace ID or 'all'
69
     * @param string $start Start date in a format accepted by strtotime()
70
     * @param string $end End date in a format accepted by strtotime()
71
     * @return int Result of query, see below.
72
     */
73
    public function countAutomatedEdits(Project $project, User $user, $namespace = 'all', $start = '', $end = ''): int
74
    {
75
        $cacheKey = $this->getCacheKey(func_get_args(), 'user_autoeditcount');
76
        if ($this->cache->hasItem($cacheKey)) {
77
            return $this->cache->getItem($cacheKey)->get();
78
        }
79
80
        [$condBegin, $condEnd] = $this->getRevTimestampConditions($start, $end);
81
82
        // Get the combined regex and tags for the tools
83
        [$regex, $tagIds] = $this->getToolRegexAndTags($project, false, null, $namespace);
84
85
        [$pageJoin, $condNamespace] = $this->getPageAndNamespaceSql($project, $namespace);
86
87
        $revisionTable = $project->getTableName('revision');
88
        $commentTable = $project->getTableName('comment');
89
        $tagTable = $project->getTableName('change_tag');
90
        $commentJoin = '';
91
        $tagJoin = '';
92
93
        $params = [];
94
95
        // Build SQL for detecting AutoEdits via regex and/or tags.
96
        $condTools = [];
97
        if ('' != $regex) {
98
            $commentJoin = "LEFT OUTER JOIN $commentTable ON rev_comment_id = comment_id";
99
            $condTools[] = "comment_text REGEXP :tools";
100
            $params['tools'] = $regex;
101
        }
102
        if ('' != $tagIds) {
103
            $tagJoin = "LEFT OUTER JOIN $tagTable ON ct_rev_id = rev_id";
104
            $condTools[] = "ct_tag_id IN ($tagIds)";
105
        }
106
        $condTool = 'AND (' . implode(' OR ', $condTools) . ')';
107
108
        $sql = "SELECT COUNT(DISTINCT(rev_id))
109
                FROM $revisionTable
110
                $pageJoin
111
                $commentJoin
112
                $tagJoin
113
                WHERE rev_actor = :actorId
114
                $condNamespace
115
                $condTool
116
                $condBegin
117
                $condEnd";
118
119
        $resultQuery = $this->executeQuery($sql, $project, $user, $namespace, $start, $end, $params);
120
        $result = (int)$resultQuery->fetchColumn();
121
122
        // Cache and return.
123
        return $this->setCache($cacheKey, $result);
124
    }
125
126
    /**
127
     * Get non-automated contributions for the given user.
128
     * @param Project $project
129
     * @param User $user
130
     * @param string|int $namespace Namespace ID or 'all'.
131
     * @param string $start Start date in a format accepted by strtotime().
132
     * @param string $end End date in a format accepted by strtotime().
133
     * @param int $offset Used for pagination, offset results by N edits.
134
     * @return string[] Result of query, with columns 'page_title', 'page_namespace', 'rev_id', 'timestamp', 'minor',
135
     *   'length', 'length_change', 'comment'.
136
     */
137
    public function getNonAutomatedEdits(
138
        Project $project,
139
        User $user,
140
        $namespace = 'all',
141
        $start = '',
142
        $end = '',
143
        int $offset = 0
144
    ): array {
145
        $cacheKey = $this->getCacheKey(func_get_args(), 'user_nonautoedits');
146
        if ($this->cache->hasItem($cacheKey)) {
147
            return $this->cache->getItem($cacheKey)->get();
148
        }
149
150
        [$condBegin, $condEnd] = $this->getRevTimestampConditions($start, $end, 'revs.');
151
152
        // Get the combined regex and tags for the tools
153
        [$regex, $tagIds] = $this->getToolRegexAndTags($project, false, null, $namespace);
154
155
        $pageTable = $project->getTableName('page');
156
        $revisionTable = $project->getTableName('revision');
157
        $commentTable = $project->getTableName('comment');
158
        $tagTable = $project->getTableName('change_tag');
159
        $condNamespace = 'all' === $namespace ? '' : 'AND page_namespace = :namespace';
160
        $condTag = '' != $tagIds ? "AND NOT EXISTS (SELECT 1 FROM $tagTable
161
            WHERE ct_rev_id = revs.rev_id AND ct_tag_id IN ($tagIds))" : '';
162
        $sql = "SELECT
163
                    page_title,
164
                    page_namespace,
165
                    revs.rev_id AS rev_id,
166
                    revs.rev_timestamp AS timestamp,
167
                    revs.rev_minor_edit AS minor,
168
                    revs.rev_len AS length,
169
                    (CAST(revs.rev_len AS SIGNED) - IFNULL(parentrevs.rev_len, 0)) AS length_change,
170
                    comment_text AS comment
171
                FROM $pageTable
172
                JOIN $revisionTable AS revs ON (page_id = revs.rev_page)
173
                LEFT JOIN $revisionTable AS parentrevs ON (revs.rev_parent_id = parentrevs.rev_id)
174
                LEFT OUTER JOIN $commentTable ON (revs.rev_comment_id = comment_id)
175
                WHERE revs.rev_actor = :actorId
176
                AND revs.rev_timestamp > 0
177
                AND comment_text NOT RLIKE :tools
178
                $condTag
179
                $condBegin
180
                $condEnd
181
                $condNamespace
182
                GROUP BY revs.rev_id
183
                ORDER BY revs.rev_timestamp DESC
184
                LIMIT 50
185
                OFFSET $offset";
186
187
        $resultQuery = $this->executeQuery($sql, $project, $user, $namespace, $start, $end, ['tools' => $regex]);
188
        $result = $resultQuery->fetchAll();
189
190
        // Cache and return.
191
        return $this->setCache($cacheKey, $result);
192
    }
193
194
    /**
195
     * Get (semi-)automated contributions for the given user, and optionally for a given tool.
196
     * @param Project $project
197
     * @param User $user
198
     * @param string|int $namespace Namespace ID or 'all'.
199
     * @param string $start Start date in a format accepted by strtotime().
200
     * @param string $end End date in a format accepted by strtotime().
201
     * @param string|null $tool Only get edits made with this tool. Must match the keys in semi_automated.yml.
202
     * @param int $offset Used for pagination, offset results by N edits.
203
     * @return string[] Result of query, with columns 'page_title', 'page_namespace', 'rev_id', 'timestamp', 'minor',
204
     *   'length', 'length_change', 'comment'.
205
     */
206
    public function getAutomatedEdits(
207
        Project $project,
208
        User $user,
209
        $namespace = 'all',
210
        $start = '',
211
        $end = '',
212
        ?string $tool = null,
213
        int $offset = 0
214
    ): array {
215
        $cacheKey = $this->getCacheKey(func_get_args(), 'user_autoedits');
216
        if ($this->cache->hasItem($cacheKey)) {
217
            return $this->cache->getItem($cacheKey)->get();
218
        }
219
220
        [$condBegin, $condEnd] = $this->getRevTimestampConditions($start, $end, 'revs.');
221
222
        // In this case there is a slight performance improvement we can make if we're not given a start date.
223
        if ('' == $condBegin && '' == $condEnd) {
224
            $condBegin = 'AND revs.rev_timestamp > 0';
225
        }
226
227
        // Get the combined regex and tags for the tools
228
        [$regex, $tagIds] = $this->getToolRegexAndTags($project, false, $tool);
229
230
        $pageTable = $project->getTableName('page');
231
        $revisionTable = $project->getTableName('revision');
232
        $commentTable = $project->getTableName('comment');
233
        $tagTable = $project->getTableName('change_tag');
234
        $condNamespace = 'all' === $namespace ? '' : 'AND page_namespace = :namespace';
235
        $tagJoin = '';
236
        $condsTool = [];
237
238
        if ('' != $regex) {
239
            $condsTool[] = 'comment_text RLIKE :tools';
240
        }
241
242
        if ('' != $tagIds) {
243
            $tagJoin = "LEFT OUTER JOIN $tagTable ON (ct_rev_id = revs.rev_id)";
244
            if ($this->usesSingleTag($project, $tool)) {
245
                // Only show edits made with the tool that don't overlap with other tools.
246
                // For instance, Huggle edits are also tagged as Rollback, but when viewing
247
                // Rollback edits we don't want to show Huggle edits.
248
                $condsTool[] = "
249
                    EXISTS (
250
                        SELECT COUNT(ct_tag_id) AS tag_count
251
                        FROM $tagTable
252
                        WHERE ct_rev_id = revs.rev_id
253
                        HAVING tag_count = 1 AND ct_tag_id = $tagIds
254
                    )";
255
            } else {
256
                $condsTool[] = "ct_tag_id IN ($tagIds)";
257
            }
258
        }
259
260
        $sql = "SELECT
261
                    page_title,
262
                    page_namespace,
263
                    revs.rev_id AS rev_id,
264
                    revs.rev_timestamp AS timestamp,
265
                    revs.rev_minor_edit AS minor,
266
                    revs.rev_len AS length,
267
                    (CAST(revs.rev_len AS SIGNED) - IFNULL(parentrevs.rev_len, 0)) AS length_change,
268
                    comment_text AS comment
269
                FROM $pageTable
270
                JOIN $revisionTable AS revs ON (page_id = revs.rev_page)
271
                LEFT JOIN $revisionTable AS parentrevs ON (revs.rev_parent_id = parentrevs.rev_id)
272
                LEFT OUTER JOIN $commentTable ON (revs.rev_comment_id = comment_id)
273
                $tagJoin
274
                WHERE revs.rev_actor = :actorId
275
                $condBegin
276
                $condEnd
277
                $condNamespace
278
                AND (".implode(' OR ', $condsTool).")
279
                GROUP BY revs.rev_id
280
                ORDER BY revs.rev_timestamp DESC
281
                LIMIT 50
282
                OFFSET $offset";
283
284
        $resultQuery = $this->executeQuery($sql, $project, $user, $namespace, $start, $end, ['tools' => $regex]);
285
        $result = $resultQuery->fetchAll();
286
287
        // Cache and return.
288
        return $this->setCache($cacheKey, $result);
289
    }
290
291
    /**
292
     * Get counts of known automated tools used by the given user.
293
     * @param Project $project
294
     * @param User $user
295
     * @param string|int $namespace Namespace ID or 'all'.
296
     * @param string $start Start date in a format accepted by strtotime().
297
     * @param string $end End date in a format accepted by strtotime().
298
     * @return string[] Each tool that they used along with the count and link:
299
     *                  [
300
     *                      'Twinkle' => [
301
     *                          'count' => 50,
302
     *                          'link' => 'Wikipedia:Twinkle',
303
     *                      ],
304
     *                  ]
305
     */
306
    public function getToolCounts(
307
        Project $project,
308
        User $user,
309
        $namespace = 'all',
310
        $start = '',
311
        $end = ''
312
    ): array {
313
        $cacheKey = $this->getCacheKey(func_get_args(), 'user_autotoolcounts');
314
        if ($this->cache->hasItem($cacheKey)) {
315
            return $this->cache->getItem($cacheKey)->get();
316
        }
317
318
        $sql = $this->getAutomatedCountsSql($project, $user, $namespace, $start, $end);
319
        $resultQuery = $this->executeQuery($sql, $project, $user, $namespace, $start, $end);
320
321
        $tools = $this->getTools($project, $namespace);
322
323
        // handling results
324
        $results = [];
325
326
        while ($row = $resultQuery->fetch()) {
327
            // Only track tools that they've used at least once
328
            $tool = $row['toolname'];
329
            if ($row['count'] > 0) {
330
                $results[$tool] = [
331
                    'link' => $tools[$tool]['link'],
332
                    'label' => $tools[$tool]['label'] ?? $tool,
333
                    'count' => $row['count'],
334
                ];
335
            }
336
        }
337
338
        // Sort the array by count
339
        uasort($results, function ($a, $b) {
340
            return $b['count'] - $a['count'];
341
        });
342
343
        // Cache and return.
344
        return $this->setCache($cacheKey, $results);
345
    }
346
347
    /**
348
     * Get SQL for getting counts of known automated tools used by the given user.
349
     * @see self::getAutomatedCounts()
350
     * @param Project $project
351
     * @param User $user
352
     * @param string|int $namespace Namespace ID or 'all'.
353
     * @param string $start Start date in a format accepted by strtotime()
354
     * @param string $end End date in a format accepted by strtotime()
355
     * @return string The SQL.
356
     */
357
    private function getAutomatedCountsSql(Project $project, User $user, $namespace, $start, $end): string
0 ignored issues
show
Unused Code introduced by
The parameter $user is not used and could be removed. ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-unused  annotation

357
    private function getAutomatedCountsSql(Project $project, /** @scrutinizer ignore-unused */ User $user, $namespace, $start, $end): string

This check looks for parameters that have been defined for a function or method, but which are not used in the method body.

Loading history...
358
    {
359
        [$condBegin, $condEnd] = $this->getRevTimestampConditions($start, $end);
360
361
        // Load the semi-automated edit types.
362
        $tools = $this->getTools($project, $namespace);
363
364
        // Create a collection of queries that we're going to run.
365
        $queries = [];
366
367
        $revisionTable = $project->getTableName('revision');
368
        [$pageJoin, $condNamespace] = $this->getPageAndNamespaceSql($project, $namespace);
369
        $conn = $this->getProjectsConnection();
370
371
        foreach ($tools as $toolName => $values) {
372
            [$condTool, $commentJoin, $tagJoin] = $this->getInnerAutomatedCountsSql($project, $values);
373
374
            $toolName = $conn->quote($toolName, \PDO::PARAM_STR);
375
376
            // No regex or tag provided for this tool. This can happen for tag-only tools that are in the global
377
            // configuration, but no local tag exists on the said project.
378
            if ('' === $condTool) {
379
                continue;
380
            }
381
382
            $queries[] .= "
383
                SELECT $toolName AS toolname, COUNT(DISTINCT(rev_id)) AS count
384
                FROM $revisionTable
385
                $pageJoin
386
                $commentJoin
387
                $tagJoin
388
                WHERE rev_actor = :actorId
389
                AND $condTool
390
                $condNamespace
391
                $condBegin
392
                $condEnd";
393
        }
394
395
        // Combine to one big query.
396
        return implode(' UNION ', $queries);
397
    }
398
399
    /**
400
     * Get some of the inner SQL for self::getAutomatedCountsSql().
401
     * @param Project $project
402
     * @param string[] $values Values as defined in semi_automated.yml
403
     * @return string[] [Equality clause, JOIN clause]
404
     */
405
    private function getInnerAutomatedCountsSql(Project $project, array $values): array
406
    {
407
        $conn = $this->getProjectsConnection();
408
        $commentJoin = '';
409
        $tagJoin = '';
410
        $condTool = '';
411
412
        if (isset($values['regex'])) {
413
            $commentTable = $project->getTableName('comment');
414
            $commentJoin = "LEFT OUTER JOIN $commentTable ON rev_comment_id = comment_id";
415
            $regex = $conn->quote($values['regex'], \PDO::PARAM_STR);
416
            $condTool = "comment_text REGEXP $regex";
417
        }
418
        if (isset($values['tag']) && isset($this->getTags($project)[$values['tag']])) {
419
            $tagTable = $project->getTableName('change_tag');
420
            $tagJoin = "LEFT OUTER JOIN $tagTable ON ct_rev_id = rev_id";
421
422
            $tagId = $this->getTags($project)[$values['tag']];
423
424
            // This ensures we count only edits made with the given tool, and not other
425
            // edits that incidentally have the same tag. For instance, Huggle edits
426
            // are also tagged as Rollback, but we want to make them mutually exclusive.
427
            $tagClause = "
428
                EXISTS (
429
                    SELECT COUNT(ct_tag_id) AS tag_count
430
                    FROM $tagTable
431
                    WHERE ct_rev_id = rev_id
432
                    HAVING tag_count = 1 AND ct_tag_id = $tagId
433
                )";
434
435
            // Use tags in addition to the regex clause, if already present.
436
            // Tags are more reliable but may not be present for edits made with
437
            // older versions of the tool, before it started adding tags.
438
            if ('' === $condTool) {
439
                $condTool = $tagClause;
440
            } else {
441
                $condTool = '(' . $condTool . " OR $tagClause)";
442
            }
443
        }
444
445
        return [$condTool, $commentJoin, $tagJoin];
446
    }
447
448
    /**
449
     * Get the combined regex and tags for all semi-automated tools, or the given tool, ready to be used in a query.
450
     * @param Project $project
451
     * @param bool $nonAutoEdits Set to true to exclude tools with the 'contribs' flag.
452
     * @param string|null $tool
453
     * @param int|string|null $namespace Tools only used in given namespace ID, or 'all' for all namespaces.
454
     * @return array In the format: ['combined|regex', [1,2,3]] where the second element contains the tag IDs.
455
     */
456
    private function getToolRegexAndTags(
457
        Project $project,
458
        bool $nonAutoEdits = false,
459
        ?string $tool = null,
460
        $namespace = null
461
    ): array {
462
        $tools = $this->getTools($project);
463
        $regexes = [];
464
        $allTagIds = $this->getTags($project);
465
        $tagIds = [];
466
467
        if ('' != $tool) {
468
            $tools = [$tools[$tool]];
469
        }
470
471
        foreach (array_values($tools) as $values) {
472
            if ($nonAutoEdits && isset($values['contribs'])) {
473
                continue;
474
            }
475
476
            if (is_numeric($namespace) &&
477
                !empty($values['namespaces']) &&
478
                !in_array((int)$namespace, $values['namespaces'])
479
            ) {
480
                continue;
481
            }
482
483
            if (isset($values['regex'])) {
484
                $regexes[] = $values['regex'];
485
            }
486
            if (isset($values['tag']) && isset($allTagIds[$values['tag']])) {
487
                $tagIds[] = $allTagIds[$values['tag']];
488
            }
489
        }
490
491
        return [
492
            implode('|', $regexes),
493
            implode(',', $tagIds),
494
        ];
495
    }
496
497
    /**
498
     * Get the IDs of tags for given Project, which are used in the IN clauses of other queries above.
499
     * This join decomposition is actually faster than JOIN'ing on change_tag_def all in one query.
500
     * @param Project $project
501
     * @return int[]
502
     */
503
    public function getTags(Project $project): array
504
    {
505
        // Set up cache.
506
        $cacheKey = $this->getCacheKey(func_get_args(), 'ae_tag_ids');
507
        if ($this->cache->hasItem($cacheKey)) {
508
            return $this->cache->getItem($cacheKey)->get();
509
        }
510
511
        $conn = $this->getProjectsConnection();
512
513
        // Get all tag values.
514
        $tags = [];
515
        foreach (array_values($this->getTools($project)) as $values) {
516
            if (isset($values['tag'])) {
517
                $tags[] = $conn->quote($values['tag'], \PDO::PARAM_STR);
518
            }
519
        }
520
521
        $tags = implode(',', $tags);
522
        $tagDefTable = $project->getTableName('change_tag_def');
523
        $sql = "SELECT ctd_name, ctd_id FROM $tagDefTable
524
                WHERE ctd_name IN ($tags)";
525
        $result = $this->executeProjectsQuery($sql)->fetchAll(\PDO::FETCH_KEY_PAIR);
526
527
        // Cache and return.
528
        return $this->setCache($cacheKey, $result);
529
    }
530
}
531