Passed
Push — master ( a1ed3f...a64a0e )
by
unknown
05:21
created

AutoEditsRepository::getInnerAutomatedCountsSql()   A

Complexity

Conditions 4
Paths 6

Size

Total Lines 36

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
cc 4
nc 6
nop 2
dl 0
loc 36
rs 9.344
c 0
b 0
f 0
1
<?php
2
/**
3
 * This file contains only the AutoEditsRepository class.
4
 */
5
6
namespace Xtools;
7
8
use AppBundle\Helper\AutomatedEditsHelper;
9
use DateInterval;
10
use Symfony\Component\Config\Definition\Exception\Exception;
11
12
/**
13
 * AutoEditsRepository is responsible for retrieving data from the database
14
 * about the automated edits made by a user.
15
 * @codeCoverageIgnore
16
 */
17
class AutoEditsRepository extends UserRepository
18
{
19
    /** @var array List of automated tools, used for fetching the tool list and filtering it. */
20
    private $aeTools;
21
22
    /**
23
     * Method to give the repository access to the AutomatedEditsHelper
24
     * and fetch the list of semi-automated tools.
25
     * @param Project $project
26
     */
27
    private function getTools(Project $project)
28
    {
29
        if (!isset($this->aeTools)) {
30
            $this->aeTools = $this->container
31
                ->get('app.automated_edits_helper')
32
                ->getTools($project);
33
        }
34
        return $this->aeTools;
35
    }
36
37
    /**
38
     * Is the tag for given tool intended to be counted by itself?
39
     * For instance, when counting Rollback edits we don't want to also
40
     * count Huggle edits (which are tagged as Rollback).
41
     * @param  Project $project
42
     * @param  string $tool
43
     * @return bool
44
     */
45
    private function usesSingleTag(Project $project, $tool)
46
    {
47
        return isset($this->getTools($project)[$tool]['single_tag']);
48
    }
49
50
    /**
51
     * Get the number of edits this user made using semi-automated tools.
52
     * @param Project $project
53
     * @param User $user
54
     * @param string|int $namespace Namespace ID or 'all'
55
     * @param string $start Start date in a format accepted by strtotime()
56
     * @param string $end End date in a format accepted by strtotime()
57
     * @return int Result of query, see below.
58
     */
59
    public function countAutomatedEdits(Project $project, User $user, $namespace = 'all', $start = '', $end = '')
60
    {
61
        $cacheKey = $this->getCacheKey(func_get_args(), 'user_autoeditcount');
62
        if ($this->cache->hasItem($cacheKey)) {
63
            return $this->cache->getItem($cacheKey)->get();
64
        }
65
        $this->stopwatch->start($cacheKey, 'XTools');
66
67
        list($condBegin, $condEnd) = $this->getRevTimestampConditions($start, $end);
68
69
        // Get the combined regex and tags for the tools
70
        list($regex, $tags) = $this->getToolRegexAndTags($project);
71
72
        list($pageJoin, $condNamespace) = $this->getPageAndNamespaceSql($project, $namespace);
73
74
        $revisionTable = $project->getTableName('revision');
75
        $tagTable = $project->getTableName('change_tag');
76
        $tagJoin = '';
77
78
        // Build SQL for detecting autoedits via regex and/or tags
79
        $condTools = [];
80
        if ($regex != '') {
81
            $condTools[] = "rev_comment REGEXP $regex";
82
        }
83
        if ($tags != '') {
84
            $tagJoin = $tags != '' ? "LEFT OUTER JOIN $tagTable ON ct_rev_id = rev_id" : '';
85
            $condTools[] = "ct_tag IN ($tags)";
86
        }
87
        $condTool = 'AND (' . implode(' OR ', $condTools) . ')';
88
89
        $sql = "SELECT COUNT(DISTINCT(rev_id))
90
                FROM $revisionTable
91
                $pageJoin
92
                $tagJoin
93
                WHERE rev_user_text = :username
94
                $condTool
95
                $condNamespace
96
                $condBegin
97
                $condEnd";
98
99
        $resultQuery = $this->executeQuery($sql, $user, $namespace, $start, $end);
100
        $result = (int) $resultQuery->fetchColumn();
101
102
        // Cache and return.
103
        $this->stopwatch->stop($cacheKey);
104
        return $this->setCache($cacheKey, $result);
105
    }
106
107
    /**
108
     * Get non-automated contributions for the given user.
109
     * @param Project $project
110
     * @param User $user
111
     * @param string|int $namespace Namespace ID or 'all'
112
     * @param string $start Start date in a format accepted by strtotime()
113
     * @param string $end End date in a format accepted by strtotime()
114
     * @param int $offset Used for pagination, offset results by N edits
115
     * @return string[] Result of query, with columns 'page_title',
116
     *   'page_namespace', 'rev_id', 'timestamp', 'minor',
117
     *   'length', 'length_change', 'comment'
118
     */
119
    public function getNonAutomatedEdits(
120
        Project $project,
121
        User $user,
122
        $namespace = 'all',
123
        $start = '',
124
        $end = '',
125
        $offset = 0
126
    ) {
127
        $cacheKey = $this->getCacheKey(func_get_args(), 'user_nonautoedits');
128
        if ($this->cache->hasItem($cacheKey)) {
129
            return $this->cache->getItem($cacheKey)->get();
130
        }
131
        $this->stopwatch->start($cacheKey, 'XTools');
132
133
        list($condBegin, $condEnd) = $this->getRevTimestampConditions($start, $end, 'revs.');
134
135
        // Get the combined regex and tags for the tools
136
        list($regex, $tags) = $this->getToolRegexAndTags($project, true);
137
138
        $pageTable = $project->getTableName('page');
139
        $revisionTable = $project->getTableName('revision');
140
        $tagTable = $project->getTableName('change_tag');
141
        $condNamespace = $namespace === 'all' ? '' : 'AND page_namespace = :namespace';
142
        $condTag = $tags != '' ? "AND NOT EXISTS (SELECT 1 FROM $tagTable
143
            WHERE ct_rev_id = revs.rev_id AND ct_tag IN ($tags))" : '';
144
        $sql = "SELECT
145
                    page_title,
146
                    page_namespace,
147
                    revs.rev_id AS rev_id,
148
                    revs.rev_timestamp AS timestamp,
149
                    revs.rev_minor_edit AS minor,
150
                    revs.rev_len AS length,
151
                    (CAST(revs.rev_len AS SIGNED) - IFNULL(parentrevs.rev_len, 0)) AS length_change,
152
                    revs.rev_comment AS comment
153
                FROM $pageTable
154
                JOIN $revisionTable AS revs ON (page_id = revs.rev_page)
155
                LEFT JOIN $revisionTable AS parentrevs ON (revs.rev_parent_id = parentrevs.rev_id)
156
                WHERE revs.rev_user_text = :username
157
                AND revs.rev_timestamp > 0
158
                AND revs.rev_comment NOT RLIKE $regex
159
                $condTag
160
                $condBegin
161
                $condEnd
162
                $condNamespace
163
                GROUP BY revs.rev_id
164
                ORDER BY revs.rev_timestamp DESC
165
                LIMIT 50
166
                OFFSET $offset";
167
168
        $resultQuery = $this->executeQuery($sql, $user, $namespace, $start, $end);
169
        $result = $resultQuery->fetchAll();
170
171
        // Cache and return.
172
        $this->stopwatch->stop($cacheKey);
173
        return $this->setCache($cacheKey, $result);
174
    }
175
176
    /**
177
     * Get (semi-)automated contributions for the given user, and optionally for a given tool
178
     * @param Project $project
179
     * @param User $user
180
     * @param string|int $namespace Namespace ID or 'all'
181
     * @param string $start Start date in a format accepted by strtotime()
182
     * @param string $end End date in a format accepted by strtotime()
183
     * @param string|null $tool Only get edits made with this tool. Must match the keys in semi_automated.yml
184
     * @param int $offset Used for pagination, offset results by N edits
185
     * @return string[] Result of query, with columns 'page_title',
186
     *   'page_namespace', 'rev_id', 'timestamp', 'minor',
187
     *   'length', 'length_change', 'comment'
188
     */
189
    public function getAutomatedEdits(
190
        Project $project,
191
        User $user,
192
        $namespace = 'all',
193
        $start = '',
194
        $end = '',
195
        $tool = null,
196
        $offset = 0
197
    ) {
198
        $cacheKey = $this->getCacheKey(func_get_args(), 'user_autoedits');
199
        if ($this->cache->hasItem($cacheKey)) {
200
            return $this->cache->getItem($cacheKey)->get();
201
        }
202
        $this->stopwatch->start($cacheKey, 'XTools');
203
204
        list($condBegin, $condEnd) = $this->getRevTimestampConditions($start, $end, 'revs.');
205
206
        // Get the combined regex and tags for the tools
207
        list($regex, $tags) = $this->getToolRegexAndTags($project, false, $tool);
208
209
        $pageTable = $project->getTableName('page');
210
        $revisionTable = $project->getTableName('revision');
211
        $tagTable = $project->getTableName('change_tag');
212
        $condNamespace = $namespace === 'all' ? '' : 'AND page_namespace = :namespace';
213
        $tagJoin = $tags != '' ? "LEFT OUTER JOIN $tagTable ON (ct_rev_id = revs.rev_id)" : '';
214
215
        $condTag = '';
216
        if ($tags != '') {
217
            if ($this->usesSingleTag($project, $tool)) {
218
                // Only show edits made with the tool that don't overlap with other tools.
219
                // For instance, Huggle edits are also tagged as Rollback, but when viewing
220
                // Rollback edits we don't want to show Huggle edits.
221
                $condTag = "
222
                    OR EXISTS (
223
                        SELECT ct_tag, COUNT(ct_tag) AS tag_count
224
                        FROM $tagTable
225
                        WHERE ct_rev_id = revs.rev_id
226
                        HAVING tag_count = 1 AND ct_tag = $tags
227
                    )
228
                ";
229
            } else {
230
                $condTag = "OR ct_tag IN ($tags)";
231
            }
232
        }
233
234
        $sql = "SELECT
235
                    page_title,
236
                    page_namespace,
237
                    revs.rev_id AS rev_id,
238
                    revs.rev_timestamp AS timestamp,
239
                    revs.rev_minor_edit AS minor,
240
                    revs.rev_len AS length,
241
                    (CAST(revs.rev_len AS SIGNED) - IFNULL(parentrevs.rev_len, 0)) AS length_change,
242
                    revs.rev_comment AS comment
243
                FROM $pageTable
244
                JOIN $revisionTable AS revs ON (page_id = revs.rev_page)
245
                LEFT JOIN $revisionTable AS parentrevs ON (revs.rev_parent_id = parentrevs.rev_id)
246
                $tagJoin
247
                WHERE revs.rev_user_text = :username
248
                AND revs.rev_timestamp > 0
249
                $condBegin
250
                $condEnd
251
                $condNamespace
252
                AND (
253
                    revs.rev_comment RLIKE $regex
254
                    $condTag
255
                )
256
                GROUP BY revs.rev_id
257
                ORDER BY revs.rev_timestamp DESC
258
                LIMIT 50
259
                OFFSET $offset";
260
261
        $resultQuery = $this->executeQuery($sql, $user, $namespace, $start, $end);
262
        $result = $resultQuery->fetchAll();
263
264
        // Cache and return.
265
        $this->stopwatch->stop($cacheKey);
266
        return $this->setCache($cacheKey, $result);
267
    }
268
269
    /**
270
     * Get counts of known automated tools used by the given user.
271
     * @param Project $project
272
     * @param User $user
273
     * @param string|int $namespace Namespace ID or 'all'.
274
     * @param string $start Start date in a format accepted by strtotime()
275
     * @param string $end End date in a format accepted by strtotime()
276
     * @return string[] Each tool that they used along with the count and link:
277
     *                  [
278
     *                      'Twinkle' => [
279
     *                          'count' => 50,
280
     *                          'link' => 'Wikipedia:Twinkle',
281
     *                      ],
282
     *                  ]
283
     */
284
    public function getToolCounts(
285
        Project $project,
286
        User $user,
287
        $namespace = 'all',
288
        $start = '',
289
        $end = ''
290
    ) {
291
        $cacheKey = $this->getCacheKey(func_get_args(), 'user_autotoolcounts');
292
        if ($this->cache->hasItem($cacheKey)) {
293
            return $this->cache->getItem($cacheKey)->get();
294
        }
295
        $this->stopwatch->start($cacheKey, 'XTools');
296
297
        $sql = $this->getAutomatedCountsSql($project, $namespace, $start, $end);
298
        $resultQuery = $this->executeQuery($sql, $user, $namespace, $start, $end);
299
300
        $tools = $this->getTools($project);
301
302
        // handling results
303
        $results = [];
304
305
        while ($row = $resultQuery->fetch()) {
306
            // Only track tools that they've used at least once
307
            $tool = $row['toolname'];
308
            if ($row['count'] > 0) {
309
                $results[$tool] = [
310
                    'link' => $tools[$tool]['link'],
311
                    'label' => isset($tools[$tool]['label'])
312
                        ? $tools[$tool]['label']
313
                        : $tool,
314
                    'count' => $row['count'],
315
                ];
316
            }
317
        }
318
319
        // Sort the array by count
320
        uasort($results, function ($a, $b) {
321
            return $b['count'] - $a['count'];
322
        });
323
324
        // Cache and return.
325
        $this->stopwatch->stop($cacheKey);
326
        return $this->setCache($cacheKey, $results);
327
    }
328
329
    /**
330
     * Get SQL for getting counts of known automated tools used by the given user.
331
     * @see self::getAutomatedCounts()
332
     * @param Project $project
333
     * @param string|int $namespace Namespace ID or 'all'.
334
     * @param string $start Start date in a format accepted by strtotime()
335
     * @param string $end End date in a format accepted by strtotime()
336
     * @return string The SQL.
337
     */
338
    private function getAutomatedCountsSql(Project $project, $namespace, $start, $end)
339
    {
340
        list($condBegin, $condEnd) = $this->getRevTimestampConditions($start, $end);
341
342
        // Load the semi-automated edit types.
343
        $tools = $this->getTools($project);
344
345
        // Create a collection of queries that we're going to run.
346
        $queries = [];
347
348
        $revisionTable = $project->getTableName('revision');
349
        $tagTable = $project->getTableName('change_tag');
350
351
        list($pageJoin, $condNamespace) = $this->getPageAndNamespaceSql($project, $namespace);
352
353
        $conn = $this->getProjectsConnection();
354
355
        foreach ($tools as $toolname => $values) {
356
            list($condTool, $tagJoin) = $this->getInnerAutomatedCountsSql($tagTable, $values);
357
358
            $toolname = $conn->quote($toolname, \PDO::PARAM_STR);
359
360
            // Developer error, no regex or tag provided for this tool.
361
            if ($condTool === '') {
362
                throw new Exception("No regex or tag found for the tool $toolname. " .
363
                    "Please verify this entry in semi_automated.yml");
364
            }
365
366
            $queries[] .= "
367
                SELECT $toolname AS toolname, COUNT(DISTINCT(rev_id)) AS count
368
                FROM $revisionTable
369
                $pageJoin
370
                $tagJoin
371
                WHERE rev_user_text = :username
372
                AND $condTool
373
                $condNamespace
374
                $condBegin
375
                $condEnd";
376
        }
377
378
        // Combine to one big query.
379
        return implode(' UNION ', $queries);
380
    }
381
382
    /**
383
     * Get some of the inner SQL for self::getAutomatedCountsSql().
384
     * @param  string $tagTable Name of the `change_tag` table.
385
     * @param  string[] $values Values as defined in semi_automated.yml
386
     * @return string[] [Equality clause, JOIN clause]
387
     */
388
    private function getInnerAutomatedCountsSql($tagTable, $values)
389
    {
390
        $conn = $this->getProjectsConnection();
391
        $tagJoin = '';
392
        $condTool = '';
393
394
        if (isset($values['regex'])) {
395
            $regex = $conn->quote($values['regex'], \PDO::PARAM_STR);
396
            $condTool = "rev_comment REGEXP $regex";
397
        }
398
        if (isset($values['tag'])) {
399
            $tagJoin = "LEFT OUTER JOIN $tagTable ON ct_rev_id = rev_id";
400
            $tag = $conn->quote($values['tag'], \PDO::PARAM_STR);
401
402
            // This ensures we count only edits made with the given tool, and not other
403
            // edits that incidentally have the same tag. For instance, Huggle edits
404
            // are also tagged as Rollback, but we want to make them mutually exclusive.
405
            $tagClause = "
406
                EXISTS (
407
                    SELECT ct_tag, COUNT(ct_tag) AS tag_count
408
                    FROM $tagTable
409
                    WHERE ct_rev_id = rev_id
410
                    HAVING tag_count = 1 AND ct_tag = $tag
411
                )";
412
413
            // Use tags in addition to the regex clause, if already present.
414
            // Tags are more reliable but may not be present for edits made with
415
            // older versions of the tool, before it started adding tags.
416
            if ($condTool === '') {
417
                $condTool = $tagClause;
418
            } else {
419
                $condTool = '(' . $condTool . " OR $tagClause)";
420
            }
421
        }
422
423
        return [$condTool, $tagJoin];
424
    }
425
426
    /**
427
     * Get the combined regex and tags for all semi-automated tools,
428
     * or the given tool, ready to be used in a query.
429
     * @param Project $project
430
     * @param bool $nonAutoEdits Set to true to exclude tools with the 'contribs' flag.
431
     * @param string|null $tool
432
     * @return string[] In the format:
433
     *    ['combined|regex', 'combined,tags']
434
     */
435
    private function getToolRegexAndTags(Project $project, $nonAutoEdits = false, $tool = null)
436
    {
437
        $conn = $this->getProjectsConnection();
438
        $tools = $this->getTools($project);
439
        $regexes = [];
440
        $tags = [];
441
442
        if ($tool != '') {
443
            $tools = [$tools[$tool]];
444
        }
445
446
        foreach ($tools as $tool => $values) {
447
            if ($nonAutoEdits && isset($values['contribs'])) {
448
                continue;
449
            }
450
451
            if (isset($values['regex'])) {
452
                $regexes[] = $values['regex'];
453
            }
454
            if (isset($values['tag'])) {
455
                $tags[] = $conn->quote($values['tag'], \PDO::PARAM_STR);
456
            }
457
        }
458
459
        return [
460
            $conn->quote(implode('|', $regexes), \PDO::PARAM_STR),
461
            implode(',', $tags),
462
        ];
463
    }
464
}
465