Test Failed
Pull Request — master (#372)
by MusikAnimal
12:26 queued 01:27
created

AutoEditsRepository::getToolCounts()   A

Complexity

Conditions 5
Paths 4

Size

Total Lines 34
Code Lines 17

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
cc 5
eloc 17
nc 4
nop 5
dl 0
loc 34
rs 9.3888
c 0
b 0
f 0
1
<?php
2
/**
3
 * This file contains only the AutoEditsRepository class.
4
 */
5
6
declare(strict_types = 1);
7
8
namespace AppBundle\Repository;
9
10
use AppBundle\Model\Project;
11
use AppBundle\Model\User;
12
13
/**
14
 * AutoEditsRepository is responsible for retrieving data from the database
15
 * about the automated edits made by a user.
16
 * @codeCoverageIgnore
17
 */
18
class AutoEditsRepository extends UserRepository
19
{
20
    /** @var array List of automated tools, used for fetching the tool list and filtering it. */
21
    private $aeTools;
22
23
    /** @var bool Whether to use the /sandbox version of the config, bypassing caching. */
24
    private $useSandbox;
25
26
    /** @var array Process cache for tags/IDs. */
27
    private $tags;
28
29
    /**
30
     * AutoEditsRepository constructor. Used solely to set $useSandbox (from AutomatedEditsController).
31
     * @param bool $useSandbox
32
     */
33
    public function __construct(bool $useSandbox = false)
34
    {
35
        $this->useSandbox = $useSandbox;
36
        parent::__construct();
37
    }
38
39
    /**
40
     * Method to give the repository access to the AutomatedEditsHelper and fetch the list of semi-automated tools.
41
     * @param Project $project
42
     * @param int|string $namespace Namespace ID or 'all'.
43
     * @return array
44
     */
45
    public function getTools(Project $project, $namespace = 'all'): array
46
    {
47
        if (!isset($this->aeTools)) {
48
            $this->aeTools = $this->container
49
                ->get('app.automated_edits_helper')
50
                ->getTools($project, $this->useSandbox);
51
        }
52
53
        if ('all' !== $namespace) {
54
            // Limit by namespace.
55
            return array_filter($this->aeTools, function (array $tool) use ($namespace) {
56
                return empty($tool['namespaces']) ||
57
                    in_array((int)$namespace, $tool['namespaces']) ||
58
                    (
59
                        1 === $namespace % 2 &&
60
                        isset($tool['talk_namespaces'])
61
                    );
62
            });
63
        }
64
65
        return $this->aeTools;
66
    }
67
68
    /**
69
     * Overrides Repository::setCache(), and will not call the parent (which sets the cache) if using the sandbox.
70
     * @inheritDoc
71
     */
72
    public function setCache(string $cacheKey, $value, $duration = 'PT20M')
73
    {
74
        if ($this->useSandbox) {
75
            return $value;
76
        } else {
77
            return parent::setCache($cacheKey, $value, $duration);
78
        }
79
    }
80
81
    /**
82
     * Get the number of edits this user made using semi-automated tools.
83
     * @param Project $project
84
     * @param User $user
85
     * @param string|int $namespace Namespace ID or 'all'
86
     * @param string $start Start date in a format accepted by strtotime()
87
     * @param string $end End date in a format accepted by strtotime()
88
     * @return int Result of query, see below.
89
     */
90
    public function countAutomatedEdits(Project $project, User $user, $namespace = 'all', $start = '', $end = ''): int
91
    {
92
        $cacheKey = $this->getCacheKey(func_get_args(), 'user_autoeditcount');
93
        if ($this->cache->hasItem($cacheKey) && !$this->useSandbox) {
94
            return $this->cache->getItem($cacheKey)->get();
95
        }
96
97
        [$condBegin, $condEnd] = $this->getRevTimestampConditions($start, $end);
98
99
        // Get the combined regex and tags for the tools
100
        [$regex, $tagIds] = $this->getToolRegexAndTags($project, false, null, $namespace);
101
102
        [$pageJoin, $condNamespace] = $this->getPageAndNamespaceSql($project, $namespace);
103
104
        $revisionTable = $project->getTableName('revision');
105
        $commentTable = $project->getTableName('comment', 'revision');
106
        $tagTable = $project->getTableName('change_tag');
107
        $commentJoin = '';
108
        $tagJoin = '';
109
110
        $params = [];
111
112
        // Build SQL for detecting AutoEdits via regex and/or tags.
113
        $condTools = [];
114
        if ('' != $regex) {
115
            $commentJoin = "LEFT OUTER JOIN $commentTable ON rev_comment_id = comment_id";
116
            $condTools[] = "comment_text REGEXP :tools";
117
            $params['tools'] = $regex;
118
        }
119
        if ('' != $tagIds) {
120
            $tagJoin = "LEFT OUTER JOIN $tagTable ON ct_rev_id = rev_id";
121
            $condTools[] = "ct_tag_id IN ($tagIds)";
122
        }
123
        $condTool = 'AND (' . implode(' OR ', $condTools) . ')';
124
125
        $sql = "SELECT COUNT(DISTINCT(rev_id))
126
                FROM $revisionTable
127
                $pageJoin
128
                $commentJoin
129
                $tagJoin
130
                WHERE rev_actor = :actorId
131
                $condNamespace
132
                $condTool
133
                $condBegin
134
                $condEnd";
135
136
        $resultQuery = $this->executeQuery($sql, $project, $user, $namespace, $start, $end, $params);
137
        $result = (int)$resultQuery->fetchColumn();
138
139
        // Cache and return.
140
        return $this->setCache($cacheKey, $result);
141
    }
142
143
    /**
144
     * Get non-automated contributions for the given user.
145
     * @param Project $project
146
     * @param User $user
147
     * @param string|int $namespace Namespace ID or 'all'.
148
     * @param string $start Start date in a format accepted by strtotime().
149
     * @param string $end End date in a format accepted by strtotime().
150
     * @param int $offset Used for pagination, offset results by N edits.
151
     * @return string[] Result of query, with columns 'page_title', 'page_namespace', 'rev_id', 'timestamp', 'minor',
152
     *   'length', 'length_change', 'comment'.
153
     */
154
    public function getNonAutomatedEdits(
155
        Project $project,
156
        User $user,
157
        $namespace = 'all',
158
        $start = '',
159
        $end = '',
160
        int $offset = 0
161
    ): array {
162
        $cacheKey = $this->getCacheKey(func_get_args(), 'user_nonautoedits');
163
        if ($this->cache->hasItem($cacheKey) && !$this->useSandbox) {
164
            return $this->cache->getItem($cacheKey)->get();
165
        }
166
167
        [$condBegin, $condEnd] = $this->getRevTimestampConditions($start, $end, 'revs.');
168
169
        // Get the combined regex and tags for the tools
170
        [$regex, $tagIds] = $this->getToolRegexAndTags($project, false, null, $namespace);
171
172
        $pageTable = $project->getTableName('page');
173
        $revisionTable = $project->getTableName('revision');
174
        $commentTable = $project->getTableName('comment', 'revision');
175
        $tagTable = $project->getTableName('change_tag');
176
        $condNamespace = 'all' === $namespace ? '' : 'AND page_namespace = :namespace';
177
        $condTag = '' != $tagIds ? "AND NOT EXISTS (SELECT 1 FROM $tagTable
178
            WHERE ct_rev_id = revs.rev_id AND ct_tag_id IN ($tagIds))" : '';
179
        $sql = "SELECT
180
                    page_title,
181
                    page_namespace,
182
                    revs.rev_id AS rev_id,
183
                    revs.rev_timestamp AS timestamp,
184
                    revs.rev_minor_edit AS minor,
185
                    revs.rev_len AS length,
186
                    (CAST(revs.rev_len AS SIGNED) - IFNULL(parentrevs.rev_len, 0)) AS length_change,
187
                    comment_text AS comment
188
                FROM $pageTable
189
                JOIN $revisionTable AS revs ON (page_id = revs.rev_page)
190
                LEFT JOIN $revisionTable AS parentrevs ON (revs.rev_parent_id = parentrevs.rev_id)
191
                LEFT OUTER JOIN $commentTable ON (revs.rev_comment_id = comment_id)
192
                WHERE revs.rev_actor = :actorId
193
                AND revs.rev_timestamp > 0
194
                AND comment_text NOT RLIKE :tools
195
                $condTag
196
                $condBegin
197
                $condEnd
198
                $condNamespace
199
                GROUP BY revs.rev_id
200
                ORDER BY revs.rev_timestamp DESC
201
                LIMIT 50
202
                OFFSET $offset";
203
204
        $resultQuery = $this->executeQuery($sql, $project, $user, $namespace, $start, $end, ['tools' => $regex]);
205
        $result = $resultQuery->fetchAll();
206
207
        // Cache and return.
208
        return $this->setCache($cacheKey, $result);
209
    }
210
211
    /**
212
     * Get (semi-)automated contributions for the given user, and optionally for a given tool.
213
     * @param Project $project
214
     * @param User $user
215
     * @param string|int $namespace Namespace ID or 'all'.
216
     * @param string $start Start date in a format accepted by strtotime().
217
     * @param string $end End date in a format accepted by strtotime().
218
     * @param string|null $tool Only get edits made with this tool. Must match the keys in the AutoEdits config.
219
     * @param int $offset Used for pagination, offset results by N edits.
220
     * @return string[] Result of query, with columns 'page_title', 'page_namespace', 'rev_id', 'timestamp', 'minor',
221
     *   'length', 'length_change', 'comment'.
222
     */
223
    public function getAutomatedEdits(
224
        Project $project,
225
        User $user,
226
        $namespace = 'all',
227
        $start = '',
228
        $end = '',
229
        ?string $tool = null,
230
        int $offset = 0
231
    ): array {
232
        $cacheKey = $this->getCacheKey(func_get_args(), 'user_autoedits');
233
        if ($this->cache->hasItem($cacheKey) && !$this->useSandbox) {
234
            return $this->cache->getItem($cacheKey)->get();
235
        }
236
237
        [$condBegin, $condEnd] = $this->getRevTimestampConditions($start, $end, 'revs.');
238
239
        // In this case there is a slight performance improvement we can make if we're not given a start date.
240
        if ('' == $condBegin && '' == $condEnd) {
241
            $condBegin = 'AND revs.rev_timestamp > 0';
242
        }
243
244
        // Get the combined regex and tags for the tools
245
        [$regex, $tagIds] = $this->getToolRegexAndTags($project, false, $tool);
246
247
        $pageTable = $project->getTableName('page');
248
        $revisionTable = $project->getTableName('revision');
249
        $commentTable = $project->getTableName('comment', 'revision');
250
        $tagTable = $project->getTableName('change_tag');
251
        $condNamespace = 'all' === $namespace ? '' : 'AND page_namespace = :namespace';
252
        $tagJoin = '';
253
        $condsTool = [];
254
255
        if ('' != $regex) {
256
            $condsTool[] = 'comment_text RLIKE :tools';
257
        }
258
259
        if ('' != $tagIds) {
260
            $tagJoin = "LEFT OUTER JOIN $tagTable ON (ct_rev_id = revs.rev_id)";
261
            $condsTool[] = "ct_tag_id IN ($tagIds)";
262
        }
263
264
        $sql = "SELECT
265
                    page_title,
266
                    page_namespace,
267
                    revs.rev_id AS rev_id,
268
                    revs.rev_timestamp AS timestamp,
269
                    revs.rev_minor_edit AS minor,
270
                    revs.rev_len AS length,
271
                    (CAST(revs.rev_len AS SIGNED) - IFNULL(parentrevs.rev_len, 0)) AS length_change,
272
                    comment_text AS comment
273
                FROM $pageTable
274
                JOIN $revisionTable AS revs ON (page_id = revs.rev_page)
275
                LEFT JOIN $revisionTable AS parentrevs ON (revs.rev_parent_id = parentrevs.rev_id)
276
                LEFT OUTER JOIN $commentTable ON (revs.rev_comment_id = comment_id)
277
                $tagJoin
278
                WHERE revs.rev_actor = :actorId
279
                $condBegin
280
                $condEnd
281
                $condNamespace
282
                AND (".implode(' OR ', $condsTool).")
283
                GROUP BY revs.rev_id
284
                ORDER BY revs.rev_timestamp DESC
285
                LIMIT 50
286
                OFFSET $offset";
287
288
        $resultQuery = $this->executeQuery($sql, $project, $user, $namespace, $start, $end, ['tools' => $regex]);
289
        $result = $resultQuery->fetchAll();
290
291
        // Cache and return.
292
        return $this->setCache($cacheKey, $result);
293
    }
294
295
    /**
296
     * Get counts of known automated tools used by the given user.
297
     * @param Project $project
298
     * @param User $user
299
     * @param string|int $namespace Namespace ID or 'all'.
300
     * @param string $start Start date in a format accepted by strtotime().
301
     * @param string $end End date in a format accepted by strtotime().
302
     * @return string[] Each tool that they used along with the count and link:
303
     *                  [
304
     *                      'Twinkle' => [
305
     *                          'count' => 50,
306
     *                          'link' => 'Wikipedia:Twinkle',
307
     *                      ],
308
     *                  ]
309
     */
310
    public function getToolCounts(Project $project, User $user, $namespace = 'all', $start = '', $end = ''): array
311
    {
312
        $cacheKey = $this->getCacheKey(func_get_args(), 'user_autotoolcounts');
313
        if ($this->cache->hasItem($cacheKey) && !$this->useSandbox) {
314
            return $this->cache->getItem($cacheKey)->get();
315
        }
316
317
        $sql = $this->getAutomatedCountsSql($project, $namespace, $start, $end);
318
        $resultQuery = $this->executeQuery($sql, $project, $user, $namespace, $start, $end);
319
320
        $tools = $this->getTools($project, $namespace);
321
322
        // handling results
323
        $results = [];
324
325
        while ($row = $resultQuery->fetch()) {
326
            // Only track tools that they've used at least once
327
            $tool = $row['toolname'];
328
            if ($row['count'] > 0) {
329
                $results[$tool] = [
330
                    'link' => $tools[$tool]['link'],
331
                    'label' => $tools[$tool]['label'] ?? $tool,
332
                    'count' => $row['count'],
333
                ];
334
            }
335
        }
336
337
        // Sort the array by count
338
        uasort($results, function ($a, $b) {
339
            return $b['count'] - $a['count'];
340
        });
341
342
        // Cache and return.
343
        return $this->setCache($cacheKey, $results);
344
    }
345
346
    /**
347
     * Get SQL for getting counts of known automated tools used by the given user.
348
     * @see self::getAutomatedCounts()
349
     * @param Project $project
350
     * @param string|int $namespace Namespace ID or 'all'.
351
     * @param string $start Start date in a format accepted by strtotime()
352
     * @param string $end End date in a format accepted by strtotime()
353
     * @return string The SQL.
354
     */
355
    private function getAutomatedCountsSql(Project $project, $namespace, $start, $end): string
356
    {
357
        [$condBegin, $condEnd] = $this->getRevTimestampConditions($start, $end);
358
359
        // Load the semi-automated edit types.
360
        $tools = $this->getTools($project, $namespace);
361
362
        // Create a collection of queries that we're going to run.
363
        $queries = [];
364
365
        $revisionTable = $project->getTableName('revision');
366
        [$pageJoin, $condNamespace] = $this->getPageAndNamespaceSql($project, $namespace);
367
        $conn = $this->getProjectsConnection();
368
369
        foreach ($tools as $toolName => $values) {
370
            [$condTool, $commentJoin, $tagJoin] = $this->getInnerAutomatedCountsSql($project, $toolName, $values);
371
372
            $toolName = $conn->quote($toolName, \PDO::PARAM_STR);
373
374
            // No regex or tag provided for this tool. This can happen for tag-only tools that are in the global
375
            // configuration, but no local tag exists on the said project.
376
            if ('' === $condTool) {
377
                continue;
378
            }
379
380
            $queries[] .= "
381
                SELECT $toolName AS toolname, COUNT(DISTINCT(rev_id)) AS count
382
                FROM $revisionTable
383
                $pageJoin
384
                $commentJoin
385
                $tagJoin
386
                WHERE rev_actor = :actorId
387
                AND $condTool
388
                $condNamespace
389
                $condBegin
390
                $condEnd";
391
        }
392
393
        // Combine to one big query.
394
        return implode(' UNION ', $queries);
395
    }
396
397
    /**
398
     * Get some of the inner SQL for self::getAutomatedCountsSql().
399
     * @param Project $project
400
     * @param string $toolName
401
     * @param string[] $values Values as defined in the AutoEdits config.
402
     * @return string[] [Equality clause, JOIN clause]
403
     */
404
    private function getInnerAutomatedCountsSql(Project $project, string $toolName, array $values): array
405
    {
406
        $conn = $this->getProjectsConnection();
407
        $commentJoin = '';
408
        $tagJoin = '';
409
        $condTool = '';
410
411
        if (isset($values['regex'])) {
412
            $commentTable = $project->getTableName('comment', 'revision');
413
            $commentJoin = "LEFT OUTER JOIN $commentTable ON rev_comment_id = comment_id";
414
            $regex = $conn->quote($values['regex'], \PDO::PARAM_STR);
415
            $condTool = "comment_text REGEXP $regex";
416
        }
417
        if (isset($values['tags'])) {
418
            $tagIds = $this->getTagIdsFromNames($project, $values['tags']);
0 ignored issues
show
Bug introduced by
$values['tags'] of type string is incompatible with the type array expected by parameter $tagNames of AppBundle\Repository\Aut...y::getTagIdsFromNames(). ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

418
            $tagIds = $this->getTagIdsFromNames($project, /** @scrutinizer ignore-type */ $values['tags']);
Loading history...
419
420
            if ($tagIds) {
0 ignored issues
show
Bug Best Practice introduced by
The expression $tagIds of type array is implicitly converted to a boolean; are you sure this is intended? If so, consider using ! empty($expr) instead to make it clear that you intend to check for an array without elements.

This check marks implicit conversions of arrays to boolean values in a comparison. While in PHP an empty array is considered to be equal (but not identical) to false, this is not always apparent.

Consider making the comparison explicit by using empty(..) or ! empty(...) instead.

Loading history...
421
                $tagTable = $project->getTableName('change_tag');
422
                $tagJoin = "LEFT OUTER JOIN $tagTable ON ct_rev_id = rev_id";
423
                $tagClause = $this->getTagsExclusionsSql($project, $toolName, $tagIds);
424
425
                // Use tags in addition to the regex clause, if already present.
426
                // Tags are more reliable but may not be present for edits made with
427
                // older versions of the tool, before it started adding tags.
428
                if ('' === $condTool) {
429
                    $condTool = $tagClause;
430
                } else {
431
                    $condTool = "($condTool OR $tagClause)";
432
                }
433
            }
434
        }
435
436
        return [$condTool, $commentJoin, $tagJoin];
437
    }
438
439
    /**
440
     * Get the combined regex and tags for all semi-automated tools, or the given tool, ready to be used in a query.
441
     * @param Project $project
442
     * @param bool $nonAutoEdits Set to true to exclude tools with the 'contribs' flag.
443
     * @param string|null $tool
444
     * @param int|string|null $namespace Tools only used in given namespace ID, or 'all' for all namespaces.
445
     * @return array In the format: ['combined|regex', '1,2,3'] where the second element is a
446
     *   comma-separated list of the tag IDs, ready to be used in SQL.
447
     */
448
    private function getToolRegexAndTags(
449
        Project $project,
450
        bool $nonAutoEdits = false,
451
        ?string $tool = null,
452
        $namespace = null
453
    ): array {
454
        $tools = $this->getTools($project);
455
        $regexes = [];
456
        $tagIds = [];
457
458
        if ('' != $tool) {
459
            $tools = [$tools[$tool]];
460
        }
461
462
        foreach (array_values($tools) as $values) {
463
            if ($nonAutoEdits && isset($values['contribs'])) {
464
                continue;
465
            }
466
467
            if (is_numeric($namespace) &&
468
                !empty($values['namespaces']) &&
469
                !in_array((int)$namespace, $values['namespaces'])
470
            ) {
471
                continue;
472
            }
473
474
            if (isset($values['regex'])) {
475
                $regexes[] = $values['regex'];
476
            }
477
            if (isset($values['tags'])) {
478
                $tagIds = array_merge($tagIds, $this->getTagIdsFromNames($project, $values['tags']));
479
            }
480
        }
481
482
        return [
483
            implode('|', $regexes),
484
            implode(',', $tagIds),
485
        ];
486
    }
487
488
    /**
489
     * Get the IDs of tags for given Project, which are used in the IN clauses of other queries above.
490
     * This join decomposition is actually faster than JOIN'ing on change_tag_def all in one query.
491
     * @param Project $project
492
     * @return int[] Keys are the tag name, values are the IDs.
493
     */
494
    public function getTags(Project $project): array
495
    {
496
        // Use process cache; ensures we don't needlessly re-query for tag IDs
497
        // during the same request when using the ?usesandbox=1 option.
498
        if (isset($this->tags)) {
499
            return $this->tags;
500
        }
501
502
        $cacheKey = $this->getCacheKey(func_get_args(), 'ae_tag_ids');
503
        if ($this->cache->hasItem($cacheKey) && !$this->useSandbox) {
504
            return $this->cache->getItem($cacheKey)->get();
505
        }
506
507
        $conn = $this->getProjectsConnection();
508
509
        // Get all tag values.
510
        $tags = [];
511
        foreach (array_values($this->getTools($project)) as $values) {
512
            if (isset($values['tags'])) {
513
                $tags = array_merge(
514
                    $tags,
515
                    array_map(function ($tag) use ($conn) {
516
                        return $conn->quote($tag, \PDO::PARAM_STR);
517
                    }, $values['tags'])
518
                );
519
            }
520
        }
521
522
        $tags = implode(',', $tags);
523
        $tagDefTable = $project->getTableName('change_tag_def');
524
        $sql = "SELECT ctd_name, ctd_id FROM $tagDefTable
525
                WHERE ctd_name IN ($tags)";
526
        $this->tags = $this->executeProjectsQuery($sql)->fetchAll(\PDO::FETCH_KEY_PAIR);
527
528
        // Cache and return.
529
        return $this->setCache($cacheKey, $this->tags);
530
    }
531
532
    /**
533
     * Generate the WHERE clause to query for the given tags, filtering out exclusions ('tag_excludes' option).
534
     * For instance, Huggle edits are also tagged as Rollback, but when viewing
535
     * Rollback edits we don't want to show Huggle edits.
536
     * @param Project $project
537
     * @param string $tool
538
     * @param array $tagIds
539
     * @return string
540
     */
541
    private function getTagsExclusionsSql(Project $project, string $tool, array $tagIds): string
542
    {
543
        $tagsList = implode(',', $tagIds);
544
        $tagExcludes = $this->getTools($project)[$tool]['tag_excludes'] ?? [];
545
        $excludesSql = '';
546
547
        if ($tagExcludes && 1 === count($tagIds)) {
548
            // Get tag IDs, filtering out those for which no ID exists (meaning there is no local tag for that tool).
549
            $excludesList = implode(',', array_filter(array_map(function ($tagName) use ($project) {
550
                return $this->getTags($project)[$tagName] ?? null;
551
            }, $tagExcludes)));
552
553
            if (strlen($excludesList)) {
554
                $excludesSql = "AND ct_tag_id NOT IN ($excludesList)";
555
            }
556
        }
557
558
        return "ct_tag_id IN ($tagsList) $excludesSql";
559
    }
560
561
    /**
562
     * Get IDs for tags given the names.
563
     * @param Project $project
564
     * @param array $tagNames
565
     * @return array
566
     */
567
    private function getTagIdsFromNames(Project $project, array $tagNames): array
568
    {
569
        $allTagIds = $this->getTags($project);
570
        $tagIds = [];
571
572
        foreach ($tagNames as $tag) {
573
            if (isset($allTagIds[$tag])) {
574
                $tagIds[] = $allTagIds[$tag];
575
            }
576
        }
577
578
        return $tagIds;
579
    }
580
}
581