Passed
Push — master ( db9227...2e7a9d )
by MusikAnimal
05:56
created

AutoEditsRepository::getAutomatedCountsSql()   A

Complexity

Conditions 3
Paths 3

Size

Total Lines 42
Code Lines 23

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
cc 3
eloc 23
nc 3
nop 4
dl 0
loc 42
rs 9.552
c 0
b 0
f 0
1
<?php
2
/**
3
 * This file contains only the AutoEditsRepository class.
4
 */
5
6
declare(strict_types = 1);
7
8
namespace AppBundle\Repository;
9
10
use AppBundle\Model\Project;
11
use AppBundle\Model\User;
12
use Symfony\Component\Config\Definition\Exception\Exception;
13
14
/**
15
 * AutoEditsRepository is responsible for retrieving data from the database
16
 * about the automated edits made by a user.
17
 * @codeCoverageIgnore
18
 */
19
class AutoEditsRepository extends UserRepository
20
{
21
    /** @var array List of automated tools, used for fetching the tool list and filtering it. */
22
    private $aeTools;
23
24
    /**
25
     * Method to give the repository access to the AutomatedEditsHelper
26
     * and fetch the list of semi-automated tools.
27
     * @param Project $project
28
     * @return array
29
     */
30
    public function getTools(Project $project): array
31
    {
32
        if (!isset($this->aeTools)) {
33
            $this->aeTools = $this->container
34
                ->get('app.automated_edits_helper')
35
                ->getTools($project);
36
        }
37
        return $this->aeTools;
38
    }
39
40
    /**
41
     * Is the tag for given tool intended to be counted by itself?
42
     * For instance, when counting Rollback edits we don't want to also
43
     * count Huggle edits (which are tagged as Rollback).
44
     * @param Project $project
45
     * @param string|null $tool
46
     * @return bool
47
     */
48
    private function usesSingleTag(Project $project, ?string $tool): bool
49
    {
50
        return isset($this->getTools($project)[$tool]['single_tag']);
51
    }
52
53
    /**
54
     * Get the number of edits this user made using semi-automated tools.
55
     * @param Project $project
56
     * @param User $user
57
     * @param string|int $namespace Namespace ID or 'all'
58
     * @param string $start Start date in a format accepted by strtotime()
59
     * @param string $end End date in a format accepted by strtotime()
60
     * @return int Result of query, see below.
61
     */
62
    public function countAutomatedEdits(Project $project, User $user, $namespace = 'all', $start = '', $end = ''): int
63
    {
64
        $cacheKey = $this->getCacheKey(func_get_args(), 'user_autoeditcount');
65
        if ($this->cache->hasItem($cacheKey)) {
66
            return $this->cache->getItem($cacheKey)->get();
67
        }
68
69
        [$condBegin, $condEnd] = $this->getRevTimestampConditions($start, $end);
70
71
        // Get the combined regex and tags for the tools
72
        [$regex, $tagIds] = $this->getToolRegexAndTags($project);
73
74
        [$pageJoin, $condNamespace] = $this->getPageAndNamespaceSql($project, $namespace);
75
76
        $revisionTable = $project->getTableName('revision');
77
        $tagTable = $project->getTableName('change_tag');
78
        $tagJoin = '';
79
80
        $params = [];
81
82
        // Build SQL for detecting autoedits via regex and/or tags
83
        $condTools = [];
84
        if ('' != $regex) {
85
            $condTools[] = "rev_comment REGEXP :tools";
86
            $params['tools'] = $regex;
87
        }
88
        if ('' != $tagIds) {
89
            $tagJoin = "LEFT OUTER JOIN $tagTable ON ct_rev_id = rev_id";
90
            $condTools[] = "ct_tag_id IN ($tagIds)";
91
        }
92
        $condTool = 'AND (' . implode(' OR ', $condTools) . ')';
93
94
        $sql = "SELECT COUNT(DISTINCT(rev_id))
95
                FROM $revisionTable
96
                $pageJoin
97
                $tagJoin
98
                WHERE rev_user_text = :username
99
                $condNamespace
100
                $condTool
101
                $condBegin
102
                $condEnd";
103
104
        $resultQuery = $this->executeQuery($sql, $user, $namespace, $start, $end, $params);
105
        $result = (int)$resultQuery->fetchColumn();
106
107
        // Cache and return.
108
        return $this->setCache($cacheKey, $result);
109
    }
110
111
    /**
112
     * Get non-automated contributions for the given user.
113
     * @param Project $project
114
     * @param User $user
115
     * @param string|int $namespace Namespace ID or 'all'.
116
     * @param string $start Start date in a format accepted by strtotime().
117
     * @param string $end End date in a format accepted by strtotime().
118
     * @param int $offset Used for pagination, offset results by N edits.
119
     * @return string[] Result of query, with columns 'page_title', 'page_namespace', 'rev_id', 'timestamp', 'minor',
120
     *   'length', 'length_change', 'comment'.
121
     */
122
    public function getNonAutomatedEdits(
123
        Project $project,
124
        User $user,
125
        $namespace = 'all',
126
        $start = '',
127
        $end = '',
128
        int $offset = 0
129
    ): array {
130
        $cacheKey = $this->getCacheKey(func_get_args(), 'user_nonautoedits');
131
        if ($this->cache->hasItem($cacheKey)) {
132
            return $this->cache->getItem($cacheKey)->get();
133
        }
134
135
        [$condBegin, $condEnd] = $this->getRevTimestampConditions($start, $end, 'revs.');
136
137
        // Get the combined regex and tags for the tools
138
        [$regex, $tagIds] = $this->getToolRegexAndTags($project, true);
139
140
        $pageTable = $project->getTableName('page');
141
        $revisionTable = $project->getTableName('revision');
142
        $tagTable = $project->getTableName('change_tag');
143
        $condNamespace = 'all' === $namespace ? '' : 'AND page_namespace = :namespace';
144
        $condTag = '' != $tagIds ? "AND NOT EXISTS (SELECT 1 FROM $tagTable
145
            WHERE ct_rev_id = revs.rev_id AND ct_tag_id IN ($tagIds))" : '';
146
        $sql = "SELECT
147
                    page_title,
148
                    page_namespace,
149
                    revs.rev_id AS rev_id,
150
                    revs.rev_timestamp AS timestamp,
151
                    revs.rev_minor_edit AS minor,
152
                    revs.rev_len AS length,
153
                    (CAST(revs.rev_len AS SIGNED) - IFNULL(parentrevs.rev_len, 0)) AS length_change,
154
                    revs.rev_comment AS comment
155
                FROM $pageTable
156
                JOIN $revisionTable AS revs ON (page_id = revs.rev_page)
157
                LEFT JOIN $revisionTable AS parentrevs ON (revs.rev_parent_id = parentrevs.rev_id)
158
                WHERE revs.rev_user_text = :username
159
                AND revs.rev_timestamp > 0
160
                AND revs.rev_comment NOT RLIKE :tools
161
                $condTag
162
                $condBegin
163
                $condEnd
164
                $condNamespace
165
                GROUP BY revs.rev_id
166
                ORDER BY revs.rev_timestamp DESC
167
                LIMIT 50
168
                OFFSET $offset";
169
170
        $resultQuery = $this->executeQuery($sql, $user, $namespace, $start, $end, ['tools' => $regex]);
171
        $result = $resultQuery->fetchAll();
172
173
        // Cache and return.
174
        return $this->setCache($cacheKey, $result);
175
    }
176
177
    /**
178
     * Get (semi-)automated contributions for the given user, and optionally for a given tool.
179
     * @param Project $project
180
     * @param User $user
181
     * @param string|int $namespace Namespace ID or 'all'.
182
     * @param string $start Start date in a format accepted by strtotime().
183
     * @param string $end End date in a format accepted by strtotime().
184
     * @param string|null $tool Only get edits made with this tool. Must match the keys in semi_automated.yml.
185
     * @param int $offset Used for pagination, offset results by N edits.
186
     * @return string[] Result of query, with columns 'page_title', 'page_namespace', 'rev_id', 'timestamp', 'minor',
187
     *   'length', 'length_change', 'comment'.
188
     */
189
    public function getAutomatedEdits(
190
        Project $project,
191
        User $user,
192
        $namespace = 'all',
193
        $start = '',
194
        $end = '',
195
        ?string $tool = null,
196
        int $offset = 0
197
    ): array {
198
        $cacheKey = $this->getCacheKey(func_get_args(), 'user_autoedits');
199
        if ($this->cache->hasItem($cacheKey)) {
200
            return $this->cache->getItem($cacheKey)->get();
201
        }
202
203
        [$condBegin, $condEnd] = $this->getRevTimestampConditions($start, $end, 'revs.');
204
205
        // In this case there is a slight performance improvement we can make if we're not given a start date.
206
        if ('' == $condBegin && '' == $condEnd) {
207
            $condBegin = 'AND revs.rev_timestamp > 0';
208
        }
209
210
        // Get the combined regex and tags for the tools
211
        [$regex, $tagIds] = $this->getToolRegexAndTags($project, false, $tool);
212
213
        $pageTable = $project->getTableName('page');
214
        $revisionTable = $project->getTableName('revision');
215
        $tagTable = $project->getTableName('change_tag');
216
        $condNamespace = 'all' === $namespace ? '' : 'AND page_namespace = :namespace';
217
        $tagJoin = '' != $tagIds ? "LEFT OUTER JOIN $tagTable ON (ct_rev_id = revs.rev_id)" : '';
218
219
        $condTag = '';
220
        if ('' != $tagIds) {
221
            if ($this->usesSingleTag($project, $tool)) {
222
                // Only show edits made with the tool that don't overlap with other tools.
223
                // For instance, Huggle edits are also tagged as Rollback, but when viewing
224
                // Rollback edits we don't want to show Huggle edits.
225
                $condTag = "
226
                    OR EXISTS (
227
                        SELECT COUNT(ct_tag_id) AS tag_count
228
                        FROM $tagTable
229
                        WHERE ct_rev_id = revs.rev_id
230
                        HAVING tag_count = 1 AND ct_tag_id = $tagIds
231
                    )
232
                ";
233
            } else {
234
                $condTag = "OR ct_tag_id IN ($tagIds)";
235
            }
236
        }
237
238
        $sql = "SELECT
239
                    page_title,
240
                    page_namespace,
241
                    revs.rev_id AS rev_id,
242
                    revs.rev_timestamp AS timestamp,
243
                    revs.rev_minor_edit AS minor,
244
                    revs.rev_len AS length,
245
                    (CAST(revs.rev_len AS SIGNED) - IFNULL(parentrevs.rev_len, 0)) AS length_change,
246
                    revs.rev_comment AS comment
247
                FROM $pageTable
248
                JOIN $revisionTable AS revs ON (page_id = revs.rev_page)
249
                LEFT JOIN $revisionTable AS parentrevs ON (revs.rev_parent_id = parentrevs.rev_id)
250
                $tagJoin
251
                WHERE revs.rev_user_text = :username
252
                $condBegin
253
                $condEnd
254
                $condNamespace
255
                AND (
256
                    revs.rev_comment RLIKE :tools
257
                    $condTag
258
                )
259
                GROUP BY revs.rev_id
260
                ORDER BY revs.rev_timestamp DESC
261
                LIMIT 50
262
                OFFSET $offset";
263
264
        $resultQuery = $this->executeQuery($sql, $user, $namespace, $start, $end, ['tools' => $regex]);
265
        $result = $resultQuery->fetchAll();
266
267
        // Cache and return.
268
        return $this->setCache($cacheKey, $result);
269
    }
270
271
    /**
272
     * Get counts of known automated tools used by the given user.
273
     * @param Project $project
274
     * @param User $user
275
     * @param string|int $namespace Namespace ID or 'all'.
276
     * @param string $start Start date in a format accepted by strtotime().
277
     * @param string $end End date in a format accepted by strtotime().
278
     * @return string[] Each tool that they used along with the count and link:
279
     *                  [
280
     *                      'Twinkle' => [
281
     *                          'count' => 50,
282
     *                          'link' => 'Wikipedia:Twinkle',
283
     *                      ],
284
     *                  ]
285
     */
286
    public function getToolCounts(
287
        Project $project,
288
        User $user,
289
        $namespace = 'all',
290
        $start = '',
291
        $end = ''
292
    ): array {
293
        $cacheKey = $this->getCacheKey(func_get_args(), 'user_autotoolcounts');
294
        if ($this->cache->hasItem($cacheKey)) {
295
            return $this->cache->getItem($cacheKey)->get();
296
        }
297
298
        $sql = $this->getAutomatedCountsSql($project, $namespace, $start, $end);
299
        $resultQuery = $this->executeQuery($sql, $user, $namespace, $start, $end);
300
301
        $tools = $this->getTools($project);
302
303
        // handling results
304
        $results = [];
305
306
        while ($row = $resultQuery->fetch()) {
307
            // Only track tools that they've used at least once
308
            $tool = $row['toolname'];
309
            if ($row['count'] > 0) {
310
                $results[$tool] = [
311
                    'link' => $tools[$tool]['link'],
312
                    'label' => $tools[$tool]['label'] ?? $tool,
313
                    'count' => $row['count'],
314
                ];
315
            }
316
        }
317
318
        // Sort the array by count
319
        uasort($results, function ($a, $b) {
320
            return $b['count'] - $a['count'];
321
        });
322
323
        // Cache and return.
324
        return $this->setCache($cacheKey, $results);
325
    }
326
327
    /**
328
     * Get SQL for getting counts of known automated tools used by the given user.
329
     * @see self::getAutomatedCounts()
330
     * @param Project $project
331
     * @param string|int $namespace Namespace ID or 'all'.
332
     * @param string $start Start date in a format accepted by strtotime()
333
     * @param string $end End date in a format accepted by strtotime()
334
     * @return string The SQL.
335
     */
336
    private function getAutomatedCountsSql(Project $project, $namespace, $start, $end): string
337
    {
338
        [$condBegin, $condEnd] = $this->getRevTimestampConditions($start, $end);
339
340
        // Load the semi-automated edit types.
341
        $tools = $this->getTools($project);
342
343
        // Create a collection of queries that we're going to run.
344
        $queries = [];
345
346
        $revisionTable = $project->getTableName('revision');
347
        $tagTable = $project->getTableName('change_tag');
348
349
        [$pageJoin, $condNamespace] = $this->getPageAndNamespaceSql($project, $namespace);
350
351
        $conn = $this->getProjectsConnection();
352
353
        foreach ($tools as $toolname => $values) {
354
            [$condTool, $tagJoin] = $this->getInnerAutomatedCountsSql($project, $tagTable, $values);
355
356
            $toolname = $conn->quote($toolname, \PDO::PARAM_STR);
357
358
            // Developer error, no regex or tag provided for this tool.
359
            if ('' === $condTool) {
360
                throw new Exception("No regex or tag found for the tool $toolname. " .
361
                    "Please verify this entry in semi_automated.yml");
362
            }
363
364
            $queries[] .= "
365
                SELECT $toolname AS toolname, COUNT(DISTINCT(rev_id)) AS count
366
                FROM $revisionTable
367
                $pageJoin
368
                $tagJoin
369
                WHERE rev_user_text = :username
370
                AND $condTool
371
                $condNamespace
372
                $condBegin
373
                $condEnd";
374
        }
375
376
        // Combine to one big query.
377
        return implode(' UNION ', $queries);
378
    }
379
380
    /**
381
     * Get some of the inner SQL for self::getAutomatedCountsSql().
382
     * @param Project $project
383
     * @param string $tagTable Name of the `change_tag` table.
384
     * @param string[] $values Values as defined in semi_automated.yml
385
     * @return string[] [Equality clause, JOIN clause]
386
     */
387
    private function getInnerAutomatedCountsSql(Project $project, string $tagTable, array $values): array
388
    {
389
        $conn = $this->getProjectsConnection();
390
        $tagJoin = '';
391
        $condTool = '';
392
393
        if (isset($values['regex'])) {
394
            $regex = $conn->quote($values['regex'], \PDO::PARAM_STR);
395
            $condTool = "rev_comment REGEXP $regex";
396
        }
397
        if (isset($values['tag']) && isset($this->getTags($project)[$values['tag']])) {
398
            $tagJoin = "LEFT OUTER JOIN $tagTable ON ct_rev_id = rev_id";
399
400
            $tagId = $this->getTags($project)[$values['tag']];
401
402
            // This ensures we count only edits made with the given tool, and not other
403
            // edits that incidentally have the same tag. For instance, Huggle edits
404
            // are also tagged as Rollback, but we want to make them mutually exclusive.
405
            $tagClause = "
406
                EXISTS (
407
                    SELECT COUNT(ct_tag_id) AS tag_count
408
                    FROM $tagTable
409
                    WHERE ct_rev_id = rev_id
410
                    HAVING tag_count = 1 AND ct_tag_id = $tagId
411
                )";
412
413
            // Use tags in addition to the regex clause, if already present.
414
            // Tags are more reliable but may not be present for edits made with
415
            // older versions of the tool, before it started adding tags.
416
            if ('' === $condTool) {
417
                $condTool = $tagClause;
418
            } else {
419
                $condTool = '(' . $condTool . " OR $tagClause)";
420
            }
421
        }
422
423
        return [$condTool, $tagJoin];
424
    }
425
426
    /**
427
     * Get the combined regex and tags for all semi-automated tools, or the given tool, ready to be used in a query.
428
     * @param Project $project
429
     * @param bool $nonAutoEdits Set to true to exclude tools with the 'contribs' flag.
430
     * @param string|null $tool
431
     * @return array In the format: ['combined|regex', [1,2,3]] where the second element contains the tag IDs.
432
     */
433
    private function getToolRegexAndTags(Project $project, bool $nonAutoEdits = false, ?string $tool = null): array
434
    {
435
        $tools = $this->getTools($project);
436
        $regexes = [];
437
        $allTagIds = $this->getTags($project);
438
        $tagIds = [];
439
440
        if ('' != $tool) {
441
            $tools = [$tools[$tool]];
442
        }
443
444
        foreach (array_values($tools) as $values) {
445
            if ($nonAutoEdits && isset($values['contribs'])) {
446
                continue;
447
            }
448
449
            if (isset($values['regex'])) {
450
                $regexes[] = $values['regex'];
451
            }
452
            if (isset($values['tag']) && isset($allTagIds[$values['tag']])) {
453
                $tagIds[] = $allTagIds[$values['tag']];
454
            }
455
        }
456
457
        return [
458
            implode('|', $regexes),
459
            implode(',', $tagIds),
460
        ];
461
    }
462
463
    /**
464
     * Get the IDs of tags for given Project, which are used in the IN clauses of other queries above.
465
     * This join decomposition is actually faster than JOIN'ing on change_tag_def all in one query.
466
     * @param Project $project
467
     * @return int[]
468
     */
469
    public function getTags(Project $project): array
470
    {
471
        // Set up cache.
472
        $cacheKey = $this->getCacheKey(func_get_args(), 'ae_tag_ids');
473
        if ($this->cache->hasItem($cacheKey)) {
474
            return $this->cache->getItem($cacheKey)->get();
475
        }
476
477
        $conn = $this->getProjectsConnection();
478
479
        // Get all tag values.
480
        $tags = [];
481
        foreach (array_values($this->getTools($project)) as $values) {
482
            if (isset($values['tag'])) {
483
                $tags[] = $conn->quote($values['tag'], \PDO::PARAM_STR);
484
            }
485
        }
486
487
        $tags = implode(',', $tags);
488
        $tagDefTable = $project->getTableName('change_tag_def');
489
        $sql = "SELECT ctd_name, ctd_id FROM $tagDefTable
490
                WHERE ctd_name IN ($tags)";
491
        $result = $this->executeProjectsQuery($sql)->fetchAll(\PDO::FETCH_KEY_PAIR);
492
493
        // Cache and return.
494
        return $this->setCache($cacheKey, $result);
495
    }
496
}
497