Passed
Pull Request — master (#373)
by MusikAnimal
11:46
created

AutoEditsRepository::getInvalidTools()   A

Complexity

Conditions 1
Paths 1

Size

Total Lines 6
Code Lines 4

Duplication

Lines 0
Ratio 0 %

Importance

Changes 1
Bugs 0 Features 0
Metric Value
cc 1
eloc 4
c 1
b 0
f 0
nc 1
nop 1
dl 0
loc 6
rs 10
1
<?php
2
/**
3
 * This file contains only the AutoEditsRepository class.
4
 */
5
6
declare(strict_types = 1);
7
8
namespace AppBundle\Repository;
9
10
use AppBundle\Model\Project;
11
use AppBundle\Model\User;
12
13
/**
14
 * AutoEditsRepository is responsible for retrieving data from the database
15
 * about the automated edits made by a user.
16
 * @codeCoverageIgnore
17
 */
18
class AutoEditsRepository extends UserRepository
19
{
20
    /** @var array List of automated tools, used for fetching the tool list and filtering it. */
21
    private $aeTools;
22
23
    /** @var bool Whether to use the /sandbox version of the config, bypassing caching. */
24
    private $useSandbox;
25
26
    /** @var array Process cache for tags/IDs. */
27
    private $tags;
28
29
    /**
30
     * AutoEditsRepository constructor. Used solely to set $useSandbox (from AutomatedEditsController).
31
     * @param bool $useSandbox
32
     */
33
    public function __construct(bool $useSandbox = false)
34
    {
35
        parent::__construct();
36
        $this->useSandbox = $useSandbox;
37
    }
38
39
    /**
40
     * Method to give the repository access to the AutomatedEditsHelper and fetch the list of semi-automated tools.
41
     * @param Project $project
42
     * @param int|string $namespace Namespace ID or 'all'.
43
     * @return array
44
     */
45
    public function getTools(Project $project, $namespace = 'all'): array
46
    {
47
        if (!isset($this->aeTools)) {
48
            $this->aeTools = $this->container
49
                ->get('app.automated_edits_helper')
50
                ->getTools($project, $this->useSandbox);
51
        }
52
53
        if ('all' !== $namespace) {
54
            // Limit by namespace.
55
            return array_filter($this->aeTools, function (array $tool) use ($namespace) {
56
                return empty($tool['namespaces']) ||
57
                    in_array((int)$namespace, $tool['namespaces']) ||
58
                    (
59
                        1 === $namespace % 2 &&
60
                        isset($tool['talk_namespaces'])
61
                    );
62
            });
63
        }
64
65
        return $this->aeTools;
66
    }
67
68
    /**
69
     * Get tools that were misconfigured, also removing them from $this->aeTools.
70
     * @param Project $project
71
     * @return string[] Labels for the invalid tools.
72
     */
73
    public function getInvalidTools(Project $project): array
74
    {
75
        $tools = $this->getTools($project);
76
        $invalidTools = $tools['invalid'] ?? [];
77
        unset($this->aeTools['invalid']);
78
        return $invalidTools;
79
    }
80
81
    /**
82
     * Overrides Repository::setCache(), and will not call the parent (which sets the cache) if using the sandbox.
83
     * @inheritDoc
84
     */
85
    public function setCache(string $cacheKey, $value, $duration = 'PT20M')
86
    {
87
        if ($this->useSandbox) {
88
            return $value;
89
        }
90
91
        return parent::setCache($cacheKey, $value, $duration);
92
    }
93
94
    /**
95
     * Get the number of edits this user made using semi-automated tools.
96
     * @param Project $project
97
     * @param User $user
98
     * @param string|int $namespace Namespace ID or 'all'
99
     * @param string $start Start date in a format accepted by strtotime()
100
     * @param string $end End date in a format accepted by strtotime()
101
     * @return int Result of query, see below.
102
     */
103
    public function countAutomatedEdits(Project $project, User $user, $namespace = 'all', $start = '', $end = ''): int
104
    {
105
        $cacheKey = $this->getCacheKey(func_get_args(), 'user_autoeditcount');
106
        if (!$this->useSandbox && $this->cache->hasItem($cacheKey)) {
107
            return $this->cache->getItem($cacheKey)->get();
108
        }
109
110
        [$condBegin, $condEnd] = $this->getRevTimestampConditions($start, $end);
111
112
        // Get the combined regex and tags for the tools
113
        [$regex, $tagIds] = $this->getToolRegexAndTags($project, false, null, $namespace);
114
115
        [$pageJoin, $condNamespace] = $this->getPageAndNamespaceSql($project, $namespace);
116
117
        $revisionTable = $project->getTableName('revision');
118
        $commentTable = $project->getTableName('comment', 'revision');
119
        $tagTable = $project->getTableName('change_tag');
120
        $commentJoin = '';
121
        $tagJoin = '';
122
123
        $params = [];
124
125
        // Build SQL for detecting AutoEdits via regex and/or tags.
126
        $condTools = [];
127
        if ('' != $regex) {
128
            $commentJoin = "LEFT OUTER JOIN $commentTable ON rev_comment_id = comment_id";
129
            $condTools[] = "comment_text REGEXP :tools";
130
            $params['tools'] = $regex;
131
        }
132
        if ('' != $tagIds) {
133
            $tagJoin = "LEFT OUTER JOIN $tagTable ON ct_rev_id = rev_id";
134
            $condTools[] = "ct_tag_id IN ($tagIds)";
135
        }
136
        $condTool = 'AND (' . implode(' OR ', $condTools) . ')';
137
138
        $sql = "SELECT COUNT(DISTINCT(rev_id))
139
                FROM $revisionTable
140
                $pageJoin
141
                $commentJoin
142
                $tagJoin
143
                WHERE rev_actor = :actorId
144
                $condNamespace
145
                $condTool
146
                $condBegin
147
                $condEnd";
148
149
        $resultQuery = $this->executeQuery($sql, $project, $user, $namespace, $start, $end, $params);
150
        $result = (int)$resultQuery->fetchColumn();
151
152
        // Cache and return.
153
        return $this->setCache($cacheKey, $result);
154
    }
155
156
    /**
157
     * Get non-automated contributions for the given user.
158
     * @param Project $project
159
     * @param User $user
160
     * @param string|int $namespace Namespace ID or 'all'.
161
     * @param string $start Start date in a format accepted by strtotime().
162
     * @param string $end End date in a format accepted by strtotime().
163
     * @param int $offset Used for pagination, offset results by N edits.
164
     * @return string[] Result of query, with columns 'page_title', 'page_namespace', 'rev_id', 'timestamp', 'minor',
165
     *   'length', 'length_change', 'comment'.
166
     */
167
    public function getNonAutomatedEdits(
168
        Project $project,
169
        User $user,
170
        $namespace = 'all',
171
        $start = '',
172
        $end = '',
173
        int $offset = 0
174
    ): array {
175
        $cacheKey = $this->getCacheKey(func_get_args(), 'user_nonautoedits');
176
        if (!$this->useSandbox && $this->cache->hasItem($cacheKey)) {
177
            return $this->cache->getItem($cacheKey)->get();
178
        }
179
180
        [$condBegin, $condEnd] = $this->getRevTimestampConditions($start, $end, 'revs.');
181
182
        // Get the combined regex and tags for the tools
183
        [$regex, $tagIds] = $this->getToolRegexAndTags($project, false, null, $namespace);
184
185
        $pageTable = $project->getTableName('page');
186
        $revisionTable = $project->getTableName('revision');
187
        $commentTable = $project->getTableName('comment', 'revision');
188
        $tagTable = $project->getTableName('change_tag');
189
        $condNamespace = 'all' === $namespace ? '' : 'AND page_namespace = :namespace';
190
        $condTag = '' != $tagIds ? "AND NOT EXISTS (SELECT 1 FROM $tagTable
191
            WHERE ct_rev_id = revs.rev_id AND ct_tag_id IN ($tagIds))" : '';
192
        $sql = "SELECT
193
                    page_title,
194
                    page_namespace,
195
                    revs.rev_id AS rev_id,
196
                    revs.rev_timestamp AS timestamp,
197
                    revs.rev_minor_edit AS minor,
198
                    revs.rev_len AS length,
199
                    (CAST(revs.rev_len AS SIGNED) - IFNULL(parentrevs.rev_len, 0)) AS length_change,
200
                    comment_text AS comment
201
                FROM $pageTable
202
                JOIN $revisionTable AS revs ON (page_id = revs.rev_page)
203
                LEFT JOIN $revisionTable AS parentrevs ON (revs.rev_parent_id = parentrevs.rev_id)
204
                LEFT OUTER JOIN $commentTable ON (revs.rev_comment_id = comment_id)
205
                WHERE revs.rev_actor = :actorId
206
                AND revs.rev_timestamp > 0
207
                AND comment_text NOT RLIKE :tools
208
                $condTag
209
                $condBegin
210
                $condEnd
211
                $condNamespace
212
                GROUP BY revs.rev_id
213
                ORDER BY revs.rev_timestamp DESC
214
                LIMIT 50
215
                OFFSET $offset";
216
217
        $resultQuery = $this->executeQuery($sql, $project, $user, $namespace, $start, $end, ['tools' => $regex]);
218
        $result = $resultQuery->fetchAll();
219
220
        // Cache and return.
221
        return $this->setCache($cacheKey, $result);
222
    }
223
224
    /**
225
     * Get (semi-)automated contributions for the given user, and optionally for a given tool.
226
     * @param Project $project
227
     * @param User $user
228
     * @param string|int $namespace Namespace ID or 'all'.
229
     * @param string $start Start date in a format accepted by strtotime().
230
     * @param string $end End date in a format accepted by strtotime().
231
     * @param string|null $tool Only get edits made with this tool. Must match the keys in the AutoEdits config.
232
     * @param int $offset Used for pagination, offset results by N edits.
233
     * @return string[] Result of query, with columns 'page_title', 'page_namespace', 'rev_id', 'timestamp', 'minor',
234
     *   'length', 'length_change', 'comment'.
235
     */
236
    public function getAutomatedEdits(
237
        Project $project,
238
        User $user,
239
        $namespace = 'all',
240
        $start = '',
241
        $end = '',
242
        ?string $tool = null,
243
        int $offset = 0
244
    ): array {
245
        $cacheKey = $this->getCacheKey(func_get_args(), 'user_autoedits');
246
        if (!$this->useSandbox && $this->cache->hasItem($cacheKey)) {
247
            return $this->cache->getItem($cacheKey)->get();
248
        }
249
250
        [$condBegin, $condEnd] = $this->getRevTimestampConditions($start, $end, 'revs.');
251
252
        // In this case there is a slight performance improvement we can make if we're not given a start date.
253
        if ('' == $condBegin && '' == $condEnd) {
254
            $condBegin = 'AND revs.rev_timestamp > 0';
255
        }
256
257
        // Get the combined regex and tags for the tools
258
        [$regex, $tagIds] = $this->getToolRegexAndTags($project, false, $tool);
259
260
        $pageTable = $project->getTableName('page');
261
        $revisionTable = $project->getTableName('revision');
262
        $commentTable = $project->getTableName('comment', 'revision');
263
        $tagTable = $project->getTableName('change_tag');
264
        $condNamespace = 'all' === $namespace ? '' : 'AND page_namespace = :namespace';
265
        $tagJoin = '';
266
        $condsTool = [];
267
268
        if ('' != $regex) {
269
            $condsTool[] = 'comment_text RLIKE :tools';
270
        }
271
272
        if ('' != $tagIds) {
273
            $tagJoin = "LEFT OUTER JOIN $tagTable ON (ct_rev_id = revs.rev_id)";
274
            $condsTool[] = "ct_tag_id IN ($tagIds)";
275
        }
276
277
        $sql = "SELECT
278
                    page_title,
279
                    page_namespace,
280
                    revs.rev_id AS rev_id,
281
                    revs.rev_timestamp AS timestamp,
282
                    revs.rev_minor_edit AS minor,
283
                    revs.rev_len AS length,
284
                    (CAST(revs.rev_len AS SIGNED) - IFNULL(parentrevs.rev_len, 0)) AS length_change,
285
                    comment_text AS comment
286
                FROM $pageTable
287
                JOIN $revisionTable AS revs ON (page_id = revs.rev_page)
288
                LEFT JOIN $revisionTable AS parentrevs ON (revs.rev_parent_id = parentrevs.rev_id)
289
                LEFT OUTER JOIN $commentTable ON (revs.rev_comment_id = comment_id)
290
                $tagJoin
291
                WHERE revs.rev_actor = :actorId
292
                $condBegin
293
                $condEnd
294
                $condNamespace
295
                AND (".implode(' OR ', $condsTool).")
296
                GROUP BY revs.rev_id
297
                ORDER BY revs.rev_timestamp DESC
298
                LIMIT 50
299
                OFFSET $offset";
300
301
        $resultQuery = $this->executeQuery($sql, $project, $user, $namespace, $start, $end, ['tools' => $regex]);
302
        $result = $resultQuery->fetchAll();
303
304
        // Cache and return.
305
        return $this->setCache($cacheKey, $result);
306
    }
307
308
    /**
309
     * Get counts of known automated tools used by the given user.
310
     * @param Project $project
311
     * @param User $user
312
     * @param string|int $namespace Namespace ID or 'all'.
313
     * @param string $start Start date in a format accepted by strtotime().
314
     * @param string $end End date in a format accepted by strtotime().
315
     * @return string[] Each tool that they used along with the count and link:
316
     *                  [
317
     *                      'Twinkle' => [
318
     *                          'count' => 50,
319
     *                          'link' => 'Wikipedia:Twinkle',
320
     *                      ],
321
     *                  ]
322
     */
323
    public function getToolCounts(Project $project, User $user, $namespace = 'all', $start = '', $end = ''): array
324
    {
325
        $cacheKey = $this->getCacheKey(func_get_args(), 'user_autotoolcounts');
326
        if (!$this->useSandbox && $this->cache->hasItem($cacheKey)) {
327
            return $this->cache->getItem($cacheKey)->get();
328
        }
329
330
        $sql = $this->getAutomatedCountsSql($project, $namespace, $start, $end);
331
        $resultQuery = $this->executeQuery($sql, $project, $user, $namespace, $start, $end);
332
333
        $tools = $this->getTools($project, $namespace);
334
335
        // handling results
336
        $results = [];
337
338
        while ($row = $resultQuery->fetch()) {
339
            // Only track tools that they've used at least once
340
            $tool = $row['toolname'];
341
            if ($row['count'] > 0) {
342
                $results[$tool] = [
343
                    'link' => $tools[$tool]['link'],
344
                    'label' => $tools[$tool]['label'] ?? $tool,
345
                    'count' => $row['count'],
346
                ];
347
            }
348
        }
349
350
        // Sort the array by count
351
        uasort($results, function ($a, $b) {
352
            return $b['count'] - $a['count'];
353
        });
354
355
        // Cache and return.
356
        return $this->setCache($cacheKey, $results);
357
    }
358
359
    /**
360
     * Get SQL for getting counts of known automated tools used by the user.
361
     * @see self::getAutomatedCounts()
362
     * @param Project $project
363
     * @param string|int $namespace Namespace ID or 'all'.
364
     * @param string $start Start date in a format accepted by strtotime()
365
     * @param string $end End date in a format accepted by strtotime()
366
     * @return string The SQL.
367
     */
368
    private function getAutomatedCountsSql(Project $project, $namespace, $start, $end): string
369
    {
370
        [$condBegin, $condEnd] = $this->getRevTimestampConditions($start, $end);
371
372
        // Load the semi-automated edit types.
373
        $tools = $this->getTools($project, $namespace);
374
375
        // Create a collection of queries that we're going to run.
376
        $queries = [];
377
378
        $revisionTable = $project->getTableName('revision');
379
        [$pageJoin, $condNamespace] = $this->getPageAndNamespaceSql($project, $namespace);
380
        $conn = $this->getProjectsConnection();
381
382
        foreach ($tools as $toolName => $values) {
383
            [$condTool, $commentJoin, $tagJoin] = $this->getInnerAutomatedCountsSql($project, $toolName, $values);
384
385
            $toolName = $conn->quote($toolName, \PDO::PARAM_STR);
386
387
            // No regex or tag provided for this tool. This can happen for tag-only tools that are in the global
388
            // configuration, but no local tag exists on the said project.
389
            if ('' === $condTool) {
390
                continue;
391
            }
392
393
            $queries[] .= "
394
                SELECT $toolName AS toolname, COUNT(DISTINCT(rev_id)) AS count
395
                FROM $revisionTable
396
                $pageJoin
397
                $commentJoin
398
                $tagJoin
399
                WHERE rev_actor = :actorId
400
                AND $condTool
401
                $condNamespace
402
                $condBegin
403
                $condEnd";
404
        }
405
406
        // Combine to one big query.
407
        return implode(' UNION ', $queries);
408
    }
409
410
    /**
411
     * Get some of the inner SQL for self::getAutomatedCountsSql().
412
     * @param Project $project
413
     * @param string $toolName
414
     * @param string[] $values Values as defined in the AutoEdits config.
415
     * @return string[] [Equality clause, JOIN clause]
416
     */
417
    private function getInnerAutomatedCountsSql(Project $project, string $toolName, array $values): array
418
    {
419
        $conn = $this->getProjectsConnection();
420
        $commentJoin = '';
421
        $tagJoin = '';
422
        $condTool = '';
423
424
        if (isset($values['regex'])) {
425
            $commentTable = $project->getTableName('comment', 'revision');
426
            $commentJoin = "LEFT OUTER JOIN $commentTable ON rev_comment_id = comment_id";
427
            $regex = $conn->quote($values['regex'], \PDO::PARAM_STR);
428
            $condTool = "comment_text REGEXP $regex";
429
        }
430
        if (isset($values['tags'])) {
431
            $tagIds = $this->getTagIdsFromNames($project, $values['tags']);
0 ignored issues
show
Bug introduced by
$values['tags'] of type string is incompatible with the type array expected by parameter $tagNames of AppBundle\Repository\Aut...y::getTagIdsFromNames(). ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

431
            $tagIds = $this->getTagIdsFromNames($project, /** @scrutinizer ignore-type */ $values['tags']);
Loading history...
432
433
            if ($tagIds) {
0 ignored issues
show
Bug Best Practice introduced by
The expression $tagIds of type array is implicitly converted to a boolean; are you sure this is intended? If so, consider using ! empty($expr) instead to make it clear that you intend to check for an array without elements.

This check marks implicit conversions of arrays to boolean values in a comparison. While in PHP an empty array is considered to be equal (but not identical) to false, this is not always apparent.

Consider making the comparison explicit by using empty(..) or ! empty(...) instead.

Loading history...
434
                $tagTable = $project->getTableName('change_tag');
435
                $tagJoin = "LEFT OUTER JOIN $tagTable ON ct_rev_id = rev_id";
436
                $tagClause = $this->getTagsExclusionsSql($project, $toolName, $tagIds);
437
438
                // Use tags in addition to the regex clause, if already present.
439
                // Tags are more reliable but may not be present for edits made with
440
                // older versions of the tool, before it started adding tags.
441
                if ('' === $condTool) {
442
                    $condTool = $tagClause;
443
                } else {
444
                    $condTool = "($condTool OR $tagClause)";
445
                }
446
            }
447
        }
448
449
        return [$condTool, $commentJoin, $tagJoin];
450
    }
451
452
    /**
453
     * Get the combined regex and tags for all semi-automated tools, or the given tool, ready to be used in a query.
454
     * @param Project $project
455
     * @param bool $nonAutoEdits Set to true to exclude tools with the 'contribs' flag.
456
     * @param string|null $tool
457
     * @param int|string|null $namespace Tools only used in given namespace ID, or 'all' for all namespaces.
458
     * @return array In the format: ['combined|regex', '1,2,3'] where the second element is a
459
     *   comma-separated list of the tag IDs, ready to be used in SQL.
460
     */
461
    private function getToolRegexAndTags(
462
        Project $project,
463
        bool $nonAutoEdits = false,
464
        ?string $tool = null,
465
        $namespace = null
466
    ): array {
467
        $tools = $this->getTools($project);
468
        $regexes = [];
469
        $tagIds = [];
470
471
        if ('' != $tool) {
472
            $tools = [$tools[$tool]];
473
        }
474
475
        foreach (array_values($tools) as $values) {
476
            if ($nonAutoEdits && isset($values['contribs'])) {
477
                continue;
478
            }
479
480
            if (is_numeric($namespace) &&
481
                !empty($values['namespaces']) &&
482
                !in_array((int)$namespace, $values['namespaces'])
483
            ) {
484
                continue;
485
            }
486
487
            if (isset($values['regex'])) {
488
                $regexes[] = $values['regex'];
489
            }
490
            if (isset($values['tags'])) {
491
                $tagIds = array_merge($tagIds, $this->getTagIdsFromNames($project, $values['tags']));
492
            }
493
        }
494
495
        return [
496
            implode('|', $regexes),
497
            implode(',', $tagIds),
498
        ];
499
    }
500
501
    /**
502
     * Get the IDs of tags for given Project, which are used in the IN clauses of other queries above.
503
     * This join decomposition is actually faster than JOIN'ing on change_tag_def all in one query.
504
     * @param Project $project
505
     * @return int[] Keys are the tag name, values are the IDs.
506
     */
507
    public function getTags(Project $project): array
508
    {
509
        // Use process cache; ensures we don't needlessly re-query for tag IDs
510
        // during the same request when using the ?usesandbox=1 option.
511
        if (isset($this->tags)) {
512
            return $this->tags;
513
        }
514
515
        $cacheKey = $this->getCacheKey(func_get_args(), 'ae_tag_ids');
516
        if (!$this->useSandbox && $this->cache->hasItem($cacheKey)) {
517
            return $this->cache->getItem($cacheKey)->get();
518
        }
519
520
        $conn = $this->getProjectsConnection();
521
522
        // Get all tag values.
523
        $tags = [];
524
        foreach (array_values($this->getTools($project)) as $values) {
525
            if (isset($values['tags'])) {
526
                $tags = array_merge(
527
                    $tags,
528
                    array_map(function ($tag) use ($conn) {
529
                        return $conn->quote($tag, \PDO::PARAM_STR);
530
                    }, $values['tags'])
531
                );
532
            }
533
        }
534
535
        $tags = implode(',', $tags);
536
        $tagDefTable = $project->getTableName('change_tag_def');
537
        $sql = "SELECT ctd_name, ctd_id FROM $tagDefTable
538
                WHERE ctd_name IN ($tags)";
539
        $this->tags = $this->executeProjectsQuery($sql)->fetchAll(\PDO::FETCH_KEY_PAIR);
540
541
        // Cache and return.
542
        return $this->setCache($cacheKey, $this->tags);
543
    }
544
545
    /**
546
     * Generate the WHERE clause to query for the given tags, filtering out exclusions ('tag_excludes' option).
547
     * For instance, Huggle edits are also tagged as Rollback, but when viewing
548
     * Rollback edits we don't want to show Huggle edits.
549
     * @param Project $project
550
     * @param string $tool
551
     * @param array $tagIds
552
     * @return string
553
     */
554
    private function getTagsExclusionsSql(Project $project, string $tool, array $tagIds): string
555
    {
556
        $tagsList = implode(',', $tagIds);
557
        $tagExcludes = $this->getTools($project)[$tool]['tag_excludes'] ?? [];
558
        $excludesSql = '';
559
560
        if ($tagExcludes && 1 === count($tagIds)) {
561
            // Get tag IDs, filtering out those for which no ID exists (meaning there is no local tag for that tool).
562
            $excludesList = implode(',', array_filter(array_map(function ($tagName) use ($project) {
563
                return $this->getTags($project)[$tagName] ?? null;
564
            }, $tagExcludes)));
565
566
            if (strlen($excludesList)) {
567
                $excludesSql = "AND ct_tag_id NOT IN ($excludesList)";
568
            }
569
        }
570
571
        return "ct_tag_id IN ($tagsList) $excludesSql";
572
    }
573
574
    /**
575
     * Get IDs for tags given the names.
576
     * @param Project $project
577
     * @param array $tagNames
578
     * @return array
579
     */
580
    private function getTagIdsFromNames(Project $project, array $tagNames): array
581
    {
582
        $allTagIds = $this->getTags($project);
583
        $tagIds = [];
584
585
        foreach ($tagNames as $tag) {
586
            if (isset($allTagIds[$tag])) {
587
                $tagIds[] = $allTagIds[$tag];
588
            }
589
        }
590
591
        return $tagIds;
592
    }
593
}
594