Passed
Push — master ( 6f4298...fb4a6d )
by MusikAnimal
12:29
created

AutoEditsRepository::getInnerAutomatedCountsSql()   A

Complexity

Conditions 5
Paths 8

Size

Total Lines 33
Code Lines 20

Duplication

Lines 0
Ratio 0 %

Importance

Changes 2
Bugs 0 Features 0
Metric Value
cc 5
eloc 20
c 2
b 0
f 0
nc 8
nop 3
dl 0
loc 33
rs 9.2888
1
<?php
2
/**
3
 * This file contains only the AutoEditsRepository class.
4
 */
5
6
declare(strict_types = 1);
7
8
namespace AppBundle\Repository;
9
10
use AppBundle\Model\Project;
11
use AppBundle\Model\User;
12
13
/**
14
 * AutoEditsRepository is responsible for retrieving data from the database
15
 * about the automated edits made by a user.
16
 * @codeCoverageIgnore
17
 */
18
class AutoEditsRepository extends UserRepository
19
{
20
    /** @var array List of automated tools, used for fetching the tool list and filtering it. */
21
    private $aeTools;
22
23
    /**
24
     * Method to give the repository access to the AutomatedEditsHelper and fetch the list of semi-automated tools.
25
     * @param Project $project
26
     * @param int|string $namespace Namespace ID or 'all'.
27
     * @return array
28
     */
29
    public function getTools(Project $project, $namespace = 'all'): array
30
    {
31
        if (!isset($this->aeTools)) {
32
            $this->aeTools = $this->container
33
                ->get('app.automated_edits_helper')
34
                ->getTools($project);
35
        }
36
37
        if ('all' !== $namespace) {
38
            // Limit by namespace.
39
            return array_filter($this->aeTools, function (array $tool) use ($namespace) {
40
                return empty($tool['namespaces']) ||
41
                    in_array((int)$namespace, $tool['namespaces']) ||
42
                    (
43
                        1 === $namespace % 2 &&
44
                        isset($tool['talk_namespaces'])
45
                    );
46
            });
47
        }
48
49
        return $this->aeTools;
50
    }
51
52
    /**
53
     * Get the number of edits this user made using semi-automated tools.
54
     * @param Project $project
55
     * @param User $user
56
     * @param string|int $namespace Namespace ID or 'all'
57
     * @param string $start Start date in a format accepted by strtotime()
58
     * @param string $end End date in a format accepted by strtotime()
59
     * @return int Result of query, see below.
60
     */
61
    public function countAutomatedEdits(Project $project, User $user, $namespace = 'all', $start = '', $end = ''): int
62
    {
63
        $cacheKey = $this->getCacheKey(func_get_args(), 'user_autoeditcount');
64
        if ($this->cache->hasItem($cacheKey)) {
65
            return $this->cache->getItem($cacheKey)->get();
66
        }
67
68
        [$condBegin, $condEnd] = $this->getRevTimestampConditions($start, $end);
69
70
        // Get the combined regex and tags for the tools
71
        [$regex, $tagIds] = $this->getToolRegexAndTags($project, false, null, $namespace);
72
73
        [$pageJoin, $condNamespace] = $this->getPageAndNamespaceSql($project, $namespace);
74
75
        $revisionTable = $project->getTableName('revision');
76
        $commentTable = $project->getTableName('comment', 'revision');
77
        $tagTable = $project->getTableName('change_tag');
78
        $commentJoin = '';
79
        $tagJoin = '';
80
81
        $params = [];
82
83
        // Build SQL for detecting AutoEdits via regex and/or tags.
84
        $condTools = [];
85
        if ('' != $regex) {
86
            $commentJoin = "LEFT OUTER JOIN $commentTable ON rev_comment_id = comment_id";
87
            $condTools[] = "comment_text REGEXP :tools";
88
            $params['tools'] = $regex;
89
        }
90
        if ('' != $tagIds) {
91
            $tagJoin = "LEFT OUTER JOIN $tagTable ON ct_rev_id = rev_id";
92
            $condTools[] = "ct_tag_id IN ($tagIds)";
93
        }
94
        $condTool = 'AND (' . implode(' OR ', $condTools) . ')';
95
96
        $sql = "SELECT COUNT(DISTINCT(rev_id))
97
                FROM $revisionTable
98
                $pageJoin
99
                $commentJoin
100
                $tagJoin
101
                WHERE rev_actor = :actorId
102
                $condNamespace
103
                $condTool
104
                $condBegin
105
                $condEnd";
106
107
        $resultQuery = $this->executeQuery($sql, $project, $user, $namespace, $start, $end, $params);
108
        $result = (int)$resultQuery->fetchColumn();
109
110
        // Cache and return.
111
        return $this->setCache($cacheKey, $result);
112
    }
113
114
    /**
115
     * Get non-automated contributions for the given user.
116
     * @param Project $project
117
     * @param User $user
118
     * @param string|int $namespace Namespace ID or 'all'.
119
     * @param string $start Start date in a format accepted by strtotime().
120
     * @param string $end End date in a format accepted by strtotime().
121
     * @param int $offset Used for pagination, offset results by N edits.
122
     * @return string[] Result of query, with columns 'page_title', 'page_namespace', 'rev_id', 'timestamp', 'minor',
123
     *   'length', 'length_change', 'comment'.
124
     */
125
    public function getNonAutomatedEdits(
126
        Project $project,
127
        User $user,
128
        $namespace = 'all',
129
        $start = '',
130
        $end = '',
131
        int $offset = 0
132
    ): array {
133
        $cacheKey = $this->getCacheKey(func_get_args(), 'user_nonautoedits');
134
        if ($this->cache->hasItem($cacheKey)) {
135
            return $this->cache->getItem($cacheKey)->get();
136
        }
137
138
        [$condBegin, $condEnd] = $this->getRevTimestampConditions($start, $end, 'revs.');
139
140
        // Get the combined regex and tags for the tools
141
        [$regex, $tagIds] = $this->getToolRegexAndTags($project, false, null, $namespace);
142
143
        $pageTable = $project->getTableName('page');
144
        $revisionTable = $project->getTableName('revision');
145
        $commentTable = $project->getTableName('comment', 'revision');
146
        $tagTable = $project->getTableName('change_tag');
147
        $condNamespace = 'all' === $namespace ? '' : 'AND page_namespace = :namespace';
148
        $condTag = '' != $tagIds ? "AND NOT EXISTS (SELECT 1 FROM $tagTable
149
            WHERE ct_rev_id = revs.rev_id AND ct_tag_id IN ($tagIds))" : '';
150
        $sql = "SELECT
151
                    page_title,
152
                    page_namespace,
153
                    revs.rev_id AS rev_id,
154
                    revs.rev_timestamp AS timestamp,
155
                    revs.rev_minor_edit AS minor,
156
                    revs.rev_len AS length,
157
                    (CAST(revs.rev_len AS SIGNED) - IFNULL(parentrevs.rev_len, 0)) AS length_change,
158
                    comment_text AS comment
159
                FROM $pageTable
160
                JOIN $revisionTable AS revs ON (page_id = revs.rev_page)
161
                LEFT JOIN $revisionTable AS parentrevs ON (revs.rev_parent_id = parentrevs.rev_id)
162
                LEFT OUTER JOIN $commentTable ON (revs.rev_comment_id = comment_id)
163
                WHERE revs.rev_actor = :actorId
164
                AND revs.rev_timestamp > 0
165
                AND comment_text NOT RLIKE :tools
166
                $condTag
167
                $condBegin
168
                $condEnd
169
                $condNamespace
170
                GROUP BY revs.rev_id
171
                ORDER BY revs.rev_timestamp DESC
172
                LIMIT 50
173
                OFFSET $offset";
174
175
        $resultQuery = $this->executeQuery($sql, $project, $user, $namespace, $start, $end, ['tools' => $regex]);
176
        $result = $resultQuery->fetchAll();
177
178
        // Cache and return.
179
        return $this->setCache($cacheKey, $result);
180
    }
181
182
    /**
183
     * Get (semi-)automated contributions for the given user, and optionally for a given tool.
184
     * @param Project $project
185
     * @param User $user
186
     * @param string|int $namespace Namespace ID or 'all'.
187
     * @param string $start Start date in a format accepted by strtotime().
188
     * @param string $end End date in a format accepted by strtotime().
189
     * @param string|null $tool Only get edits made with this tool. Must match the keys in semi_automated.yml.
190
     * @param int $offset Used for pagination, offset results by N edits.
191
     * @return string[] Result of query, with columns 'page_title', 'page_namespace', 'rev_id', 'timestamp', 'minor',
192
     *   'length', 'length_change', 'comment'.
193
     */
194
    public function getAutomatedEdits(
195
        Project $project,
196
        User $user,
197
        $namespace = 'all',
198
        $start = '',
199
        $end = '',
200
        ?string $tool = null,
201
        int $offset = 0
202
    ): array {
203
        $cacheKey = $this->getCacheKey(func_get_args(), 'user_autoedits');
204
        if ($this->cache->hasItem($cacheKey)) {
205
            return $this->cache->getItem($cacheKey)->get();
206
        }
207
208
        [$condBegin, $condEnd] = $this->getRevTimestampConditions($start, $end, 'revs.');
209
210
        // In this case there is a slight performance improvement we can make if we're not given a start date.
211
        if ('' == $condBegin && '' == $condEnd) {
212
            $condBegin = 'AND revs.rev_timestamp > 0';
213
        }
214
215
        // Get the combined regex and tags for the tools
216
        [$regex, $tagIds] = $this->getToolRegexAndTags($project, false, $tool);
217
218
        $pageTable = $project->getTableName('page');
219
        $revisionTable = $project->getTableName('revision');
220
        $commentTable = $project->getTableName('comment', 'revision');
221
        $tagTable = $project->getTableName('change_tag');
222
        $condNamespace = 'all' === $namespace ? '' : 'AND page_namespace = :namespace';
223
        $tagJoin = '';
224
        $condsTool = [];
225
226
        if ('' != $regex) {
227
            $condsTool[] = 'comment_text RLIKE :tools';
228
        }
229
230
        if ('' != $tagIds) {
231
            $tagJoin = "LEFT OUTER JOIN $tagTable ON (ct_rev_id = revs.rev_id)";
232
            $condsTool[] = "ct_tag_id IN ($tagIds)";
233
        }
234
235
        $sql = "SELECT
236
                    page_title,
237
                    page_namespace,
238
                    revs.rev_id AS rev_id,
239
                    revs.rev_timestamp AS timestamp,
240
                    revs.rev_minor_edit AS minor,
241
                    revs.rev_len AS length,
242
                    (CAST(revs.rev_len AS SIGNED) - IFNULL(parentrevs.rev_len, 0)) AS length_change,
243
                    comment_text AS comment
244
                FROM $pageTable
245
                JOIN $revisionTable AS revs ON (page_id = revs.rev_page)
246
                LEFT JOIN $revisionTable AS parentrevs ON (revs.rev_parent_id = parentrevs.rev_id)
247
                LEFT OUTER JOIN $commentTable ON (revs.rev_comment_id = comment_id)
248
                $tagJoin
249
                WHERE revs.rev_actor = :actorId
250
                $condBegin
251
                $condEnd
252
                $condNamespace
253
                AND (".implode(' OR ', $condsTool).")
254
                GROUP BY revs.rev_id
255
                ORDER BY revs.rev_timestamp DESC
256
                LIMIT 50
257
                OFFSET $offset";
258
259
        $resultQuery = $this->executeQuery($sql, $project, $user, $namespace, $start, $end, ['tools' => $regex]);
260
        $result = $resultQuery->fetchAll();
261
262
        // Cache and return.
263
        return $this->setCache($cacheKey, $result);
264
    }
265
266
    /**
267
     * Get counts of known automated tools used by the given user.
268
     * @param Project $project
269
     * @param User $user
270
     * @param string|int $namespace Namespace ID or 'all'.
271
     * @param string $start Start date in a format accepted by strtotime().
272
     * @param string $end End date in a format accepted by strtotime().
273
     * @return string[] Each tool that they used along with the count and link:
274
     *                  [
275
     *                      'Twinkle' => [
276
     *                          'count' => 50,
277
     *                          'link' => 'Wikipedia:Twinkle',
278
     *                      ],
279
     *                  ]
280
     */
281
    public function getToolCounts(
282
        Project $project,
283
        User $user,
284
        $namespace = 'all',
285
        $start = '',
286
        $end = ''
287
    ): array {
288
        $cacheKey = $this->getCacheKey(func_get_args(), 'user_autotoolcounts');
289
        if ($this->cache->hasItem($cacheKey)) {
290
            return $this->cache->getItem($cacheKey)->get();
291
        }
292
293
        $sql = $this->getAutomatedCountsSql($project, $user, $namespace, $start, $end);
294
        $resultQuery = $this->executeQuery($sql, $project, $user, $namespace, $start, $end);
295
296
        $tools = $this->getTools($project, $namespace);
297
298
        // handling results
299
        $results = [];
300
301
        while ($row = $resultQuery->fetch()) {
302
            // Only track tools that they've used at least once
303
            $tool = $row['toolname'];
304
            if ($row['count'] > 0) {
305
                $results[$tool] = [
306
                    'link' => $tools[$tool]['link'],
307
                    'label' => $tools[$tool]['label'] ?? $tool,
308
                    'count' => $row['count'],
309
                ];
310
            }
311
        }
312
313
        // Sort the array by count
314
        uasort($results, function ($a, $b) {
315
            return $b['count'] - $a['count'];
316
        });
317
318
        // Cache and return.
319
        return $this->setCache($cacheKey, $results);
320
    }
321
322
    /**
323
     * Get SQL for getting counts of known automated tools used by the given user.
324
     * @see self::getAutomatedCounts()
325
     * @param Project $project
326
     * @param User $user
327
     * @param string|int $namespace Namespace ID or 'all'.
328
     * @param string $start Start date in a format accepted by strtotime()
329
     * @param string $end End date in a format accepted by strtotime()
330
     * @return string The SQL.
331
     */
332
    private function getAutomatedCountsSql(Project $project, User $user, $namespace, $start, $end): string
0 ignored issues
show
Unused Code introduced by
The parameter $user is not used and could be removed. ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-unused  annotation

332
    private function getAutomatedCountsSql(Project $project, /** @scrutinizer ignore-unused */ User $user, $namespace, $start, $end): string

This check looks for parameters that have been defined for a function or method, but which are not used in the method body.

Loading history...
333
    {
334
        [$condBegin, $condEnd] = $this->getRevTimestampConditions($start, $end);
335
336
        // Load the semi-automated edit types.
337
        $tools = $this->getTools($project, $namespace);
338
339
        // Create a collection of queries that we're going to run.
340
        $queries = [];
341
342
        $revisionTable = $project->getTableName('revision');
343
        [$pageJoin, $condNamespace] = $this->getPageAndNamespaceSql($project, $namespace);
344
        $conn = $this->getProjectsConnection();
345
346
        foreach ($tools as $toolName => $values) {
347
            [$condTool, $commentJoin, $tagJoin] = $this->getInnerAutomatedCountsSql($project, $toolName, $values);
348
349
            $toolName = $conn->quote($toolName, \PDO::PARAM_STR);
350
351
            // No regex or tag provided for this tool. This can happen for tag-only tools that are in the global
352
            // configuration, but no local tag exists on the said project.
353
            if ('' === $condTool) {
354
                continue;
355
            }
356
357
            $queries[] .= "
358
                SELECT $toolName AS toolname, COUNT(DISTINCT(rev_id)) AS count
359
                FROM $revisionTable
360
                $pageJoin
361
                $commentJoin
362
                $tagJoin
363
                WHERE rev_actor = :actorId
364
                AND $condTool
365
                $condNamespace
366
                $condBegin
367
                $condEnd";
368
        }
369
370
        // Combine to one big query.
371
        return implode(' UNION ', $queries);
372
    }
373
374
    /**
375
     * Get some of the inner SQL for self::getAutomatedCountsSql().
376
     * @param Project $project
377
     * @param string $toolName
378
     * @param string[] $values Values as defined in semi_automated.yml
379
     * @return string[] [Equality clause, JOIN clause]
380
     */
381
    private function getInnerAutomatedCountsSql(Project $project, string $toolName, array $values): array
382
    {
383
        $conn = $this->getProjectsConnection();
384
        $commentJoin = '';
385
        $tagJoin = '';
386
        $condTool = '';
387
388
        if (isset($values['regex'])) {
389
            $commentTable = $project->getTableName('comment', 'revision');
390
            $commentJoin = "LEFT OUTER JOIN $commentTable ON rev_comment_id = comment_id";
391
            $regex = $conn->quote($values['regex'], \PDO::PARAM_STR);
392
            $condTool = "comment_text REGEXP $regex";
393
        }
394
        if (isset($values['tags'])) {
395
            $tagIds = $this->getTagIdsFromNames($project, $values['tags']);
0 ignored issues
show
Bug introduced by
$values['tags'] of type string is incompatible with the type array expected by parameter $tagNames of AppBundle\Repository\Aut...y::getTagIdsFromNames(). ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

395
            $tagIds = $this->getTagIdsFromNames($project, /** @scrutinizer ignore-type */ $values['tags']);
Loading history...
396
397
            if ($tagIds) {
0 ignored issues
show
Bug Best Practice introduced by
The expression $tagIds of type array is implicitly converted to a boolean; are you sure this is intended? If so, consider using ! empty($expr) instead to make it clear that you intend to check for an array without elements.

This check marks implicit conversions of arrays to boolean values in a comparison. While in PHP an empty array is considered to be equal (but not identical) to false, this is not always apparent.

Consider making the comparison explicit by using empty(..) or ! empty(...) instead.

Loading history...
398
                $tagTable = $project->getTableName('change_tag');
399
                $tagJoin = "LEFT OUTER JOIN $tagTable ON ct_rev_id = rev_id";
400
                $tagClause = $this->getTagsExclusionsSql($project, $toolName, $tagIds);
401
402
                // Use tags in addition to the regex clause, if already present.
403
                // Tags are more reliable but may not be present for edits made with
404
                // older versions of the tool, before it started adding tags.
405
                if ('' === $condTool) {
406
                    $condTool = $tagClause;
407
                } else {
408
                    $condTool = "($condTool OR $tagClause)";
409
                }
410
            }
411
        }
412
413
        return [$condTool, $commentJoin, $tagJoin];
414
    }
415
416
    /**
417
     * Get the combined regex and tags for all semi-automated tools, or the given tool, ready to be used in a query.
418
     * @param Project $project
419
     * @param bool $nonAutoEdits Set to true to exclude tools with the 'contribs' flag.
420
     * @param string|null $tool
421
     * @param int|string|null $namespace Tools only used in given namespace ID, or 'all' for all namespaces.
422
     * @return array In the format: ['combined|regex', '1,2,3'] where the second element is a
423
     *   comma-separated list of the tag IDs, ready to be used in SQL.
424
     */
425
    private function getToolRegexAndTags(
426
        Project $project,
427
        bool $nonAutoEdits = false,
428
        ?string $tool = null,
429
        $namespace = null
430
    ): array {
431
        $tools = $this->getTools($project);
432
        $regexes = [];
433
        $tagIds = [];
434
435
        if ('' != $tool) {
436
            $tools = [$tools[$tool]];
437
        }
438
439
        foreach (array_values($tools) as $values) {
440
            if ($nonAutoEdits && isset($values['contribs'])) {
441
                continue;
442
            }
443
444
            if (is_numeric($namespace) &&
445
                !empty($values['namespaces']) &&
446
                !in_array((int)$namespace, $values['namespaces'])
447
            ) {
448
                continue;
449
            }
450
451
            if (isset($values['regex'])) {
452
                $regexes[] = $values['regex'];
453
            }
454
            if (isset($values['tags'])) {
455
                $tagIds = array_merge($tagIds, $this->getTagIdsFromNames($project, $values['tags']));
456
            }
457
        }
458
459
        return [
460
            implode('|', $regexes),
461
            implode(',', $tagIds),
462
        ];
463
    }
464
465
    /**
466
     * Get the IDs of tags for given Project, which are used in the IN clauses of other queries above.
467
     * This join decomposition is actually faster than JOIN'ing on change_tag_def all in one query.
468
     * @param Project $project
469
     * @return int[] Keys are the tag name, values are the IDs.
470
     */
471
    public function getTags(Project $project): array
472
    {
473
        // Set up cache.
474
        $cacheKey = $this->getCacheKey(func_get_args(), 'ae_tag_ids');
475
        if ($this->cache->hasItem($cacheKey)) {
476
            return $this->cache->getItem($cacheKey)->get();
477
        }
478
479
        $conn = $this->getProjectsConnection();
480
481
        // Get all tag values.
482
        $tags = [];
483
        foreach (array_values($this->getTools($project)) as $values) {
484
            if (isset($values['tags'])) {
485
                $tags = array_merge(
486
                    $tags,
487
                    array_map(function ($tag) use ($conn) {
488
                        return $conn->quote($tag, \PDO::PARAM_STR);
489
                    }, $values['tags'])
490
                );
491
            }
492
        }
493
494
        $tags = implode(',', $tags);
495
        $tagDefTable = $project->getTableName('change_tag_def');
496
        $sql = "SELECT ctd_name, ctd_id FROM $tagDefTable
497
                WHERE ctd_name IN ($tags)";
498
        $result = $this->executeProjectsQuery($sql)->fetchAll(\PDO::FETCH_KEY_PAIR);
499
500
        // Cache and return.
501
        return $this->setCache($cacheKey, $result);
502
    }
503
504
    /**
505
     * Generate the WHERE clause to query for the given tags, filtering out exclusions ('tag_excludes' option).
506
     * For instance, Huggle edits are also tagged as Rollback, but when viewing
507
     * Rollback edits we don't want to show Huggle edits.
508
     * @param Project $project
509
     * @param string $tool
510
     * @param array $tagIds
511
     * @return string
512
     */
513
    private function getTagsExclusionsSql(Project $project, string $tool, array $tagIds): string
514
    {
515
        $tagsList = implode(',', $tagIds);
516
        $tagExcludes = $this->getTools($project)[$tool]['tag_excludes'] ?? [];
517
        $excludesSql = '';
518
519
        if ($tagExcludes && 1 === count($tagIds)) {
520
            // Get tag IDs, filtering out those for which no ID exists (meaning there is no local tag for that tool).
521
            $excludesList = implode(',', array_filter(array_map(function ($tagName) use ($project) {
522
                return $this->getTags($project)[$tagName] ?? null;
523
            }, $tagExcludes)));
524
525
            if (strlen($excludesList)) {
526
                $excludesSql = "AND ct_tag_id NOT IN ($excludesList)";
527
            }
528
        }
529
530
        return "ct_tag_id IN ($tagsList) $excludesSql";
531
    }
532
533
    /**
534
     * Get IDs for tags given the names.
535
     * @param Project $project
536
     * @param array $tagNames
537
     * @return array
538
     */
539
    private function getTagIdsFromNames(Project $project, array $tagNames): array
540
    {
541
        $allTagIds = $this->getTags($project);
542
        $tagIds = [];
543
544
        foreach ($tagNames as $tag) {
545
            if (isset($allTagIds[$tag])) {
546
                $tagIds[] = $allTagIds[$tag];
547
            }
548
        }
549
550
        return $tagIds;
551
    }
552
}
553