Passed
Push — master ( dcfba0...0e0426 )
by MusikAnimal
04:50
created

AutoEditsRepository::countAutomatedEdits()   B

Complexity

Conditions 5
Paths 7

Size

Total Lines 46
Code Lines 29

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
cc 5
eloc 29
nc 7
nop 5
dl 0
loc 46
rs 8.4751
c 0
b 0
f 0
1
<?php
2
/**
3
 * This file contains only the AutoEditsRepository class.
4
 */
5
6
namespace Xtools;
7
8
use DateInterval;
9
use Symfony\Component\Config\Definition\Exception\Exception;
10
11
/**
12
 * AutoEditsRepository is responsible for retrieving data from the database
13
 * about the automated edits made by a user.
14
 * @codeCoverageIgnore
15
 */
16
class AutoEditsRepository extends UserRepository
17
{
18
    /**
19
     * Get the number of edits this user made using semi-automated tools.
20
     * @param Project $project
21
     * @param User $user
22
     * @param string|int $namespace Namespace ID or 'all'
23
     * @param string $start Start date in a format accepted by strtotime()
24
     * @param string $end End date in a format accepted by strtotime()
25
     * @return int Result of query, see below.
26
     */
27
    public function countAutomatedEdits(Project $project, User $user, $namespace = 'all', $start = '', $end = '')
28
    {
29
        $cacheKey = $this->getCacheKey(func_get_args(), 'user_autoeditcount');
30
        if ($this->cache->hasItem($cacheKey)) {
31
            return $this->cache->getItem($cacheKey)->get();
32
        }
33
        $this->stopwatch->start($cacheKey, 'XTools');
34
35
        list($condBegin, $condEnd) = $this->getRevTimestampConditions($start, $end);
36
37
        // Get the combined regex and tags for the tools
38
        list($regex, $tags) = $this->getToolRegexAndTags($project);
39
40
        list($pageJoin, $condNamespace) = $this->getPageAndNamespaceSql($project, $namespace);
41
42
        $revisionTable = $project->getTableName('revision');
43
        $tagTable = $project->getTableName('change_tag');
44
        $tagJoin = '';
45
46
        // Build SQL for detecting autoedits via regex and/or tags
47
        $condTools = [];
48
        if ($regex != '') {
49
            $condTools[] = "rev_comment REGEXP $regex";
50
        }
51
        if ($tags != '') {
52
            $tagJoin = $tags != '' ? "LEFT OUTER JOIN $tagTable ON ct_rev_id = rev_id" : '';
53
            $condTools[] = "ct_tag IN ($tags)";
54
        }
55
        $condTool = 'AND (' . implode(' OR ', $condTools) . ')';
56
57
        $sql = "SELECT COUNT(DISTINCT(rev_id))
58
                FROM $revisionTable
59
                $pageJoin
60
                $tagJoin
61
                WHERE rev_user_text = :username
62
                $condTool
63
                $condNamespace
64
                $condBegin
65
                $condEnd";
66
67
        $resultQuery = $this->executeQuery($sql, $user, $namespace, $start, $end);
68
        $result = (int) $resultQuery->fetchColumn();
69
70
        // Cache and return.
71
        $this->stopwatch->stop($cacheKey);
72
        return $this->setCache($cacheKey, $result);
73
    }
74
75
    /**
76
     * Get non-automated contributions for the given user.
77
     * @param Project $project
78
     * @param User $user
79
     * @param string|int $namespace Namespace ID or 'all'
80
     * @param string $start Start date in a format accepted by strtotime()
81
     * @param string $end End date in a format accepted by strtotime()
82
     * @param int $offset Used for pagination, offset results by N edits
83
     * @return string[] Result of query, with columns 'page_title',
84
     *   'page_namespace', 'rev_id', 'timestamp', 'minor',
85
     *   'length', 'length_change', 'comment'
86
     */
87
    public function getNonAutomatedEdits(
88
        Project $project,
89
        User $user,
90
        $namespace = 'all',
91
        $start = '',
92
        $end = '',
93
        $offset = 0
94
    ) {
95
        $cacheKey = $this->getCacheKey(func_get_args(), 'user_nonautoedits');
96
        if ($this->cache->hasItem($cacheKey)) {
97
            return $this->cache->getItem($cacheKey)->get();
98
        }
99
        $this->stopwatch->start($cacheKey, 'XTools');
100
101
        list($condBegin, $condEnd) = $this->getRevTimestampConditions($start, $end, 'revs.');
102
103
        // Get the combined regex and tags for the tools
104
        list($regex, $tags) = $this->getToolRegexAndTags($project);
105
106
        $pageTable = $project->getTableName('page');
107
        $revisionTable = $project->getTableName('revision');
108
        $tagTable = $project->getTableName('change_tag');
109
        $condNamespace = $namespace === 'all' ? '' : 'AND page_namespace = :namespace';
110
        $tagJoin = $tags != '' ? "LEFT OUTER JOIN $tagTable ON (ct_rev_id = revs.rev_id)" : '';
111
        $condTag = $tags != '' ? "AND (ct_tag NOT IN ($tags) OR ct_tag IS NULL)" : '';
112
        $sql = "SELECT
113
                    page_title,
114
                    page_namespace,
115
                    revs.rev_id AS rev_id,
116
                    revs.rev_timestamp AS timestamp,
117
                    revs.rev_minor_edit AS minor,
118
                    revs.rev_len AS length,
119
                    (CAST(revs.rev_len AS SIGNED) - IFNULL(parentrevs.rev_len, 0)) AS length_change,
120
                    revs.rev_comment AS comment
121
                FROM $pageTable
122
                JOIN $revisionTable AS revs ON (page_id = revs.rev_page)
123
                LEFT JOIN $revisionTable AS parentrevs ON (revs.rev_parent_id = parentrevs.rev_id)
124
                $tagJoin
125
                WHERE revs.rev_user_text = :username
126
                AND revs.rev_timestamp > 0
127
                AND revs.rev_comment NOT RLIKE $regex
128
                $condTag
129
                $condBegin
130
                $condEnd
131
                $condNamespace
132
                GROUP BY revs.rev_id
133
                ORDER BY revs.rev_timestamp DESC
134
                LIMIT 50
135
                OFFSET $offset";
136
137
        $resultQuery = $this->executeQuery($sql, $user, $namespace, $start, $end);
138
        $result = $resultQuery->fetchAll();
139
140
        // Cache and return.
141
        $this->stopwatch->stop($cacheKey);
142
        return $this->setCache($cacheKey, $result);
143
    }
144
145
    /**
146
     * Get (semi-)automated contributions for the given user, and optionally for a given tool
147
     * @param Project $project
148
     * @param User $user
149
     * @param string|int $namespace Namespace ID or 'all'
150
     * @param string $start Start date in a format accepted by strtotime()
151
     * @param string $end End date in a format accepted by strtotime()
152
     * @param string|null $tool Only get edits made with this tool. Must match the keys in semi_automated.yml
153
     * @param int $offset Used for pagination, offset results by N edits
154
     * @return string[] Result of query, with columns 'page_title',
155
     *   'page_namespace', 'rev_id', 'timestamp', 'minor',
156
     *   'length', 'length_change', 'comment'
157
     */
158
    public function getAutomatedEdits(
159
        Project $project,
160
        User $user,
161
        $namespace = 'all',
162
        $start = '',
163
        $end = '',
164
        $tool = null,
165
        $offset = 0
166
    ) {
167
        $cacheKey = $this->getCacheKey(func_get_args(), 'user_autoedits');
168
        if ($this->cache->hasItem($cacheKey)) {
169
            return $this->cache->getItem($cacheKey)->get();
170
        }
171
        $this->stopwatch->start($cacheKey, 'XTools');
172
173
        list($condBegin, $condEnd) = $this->getRevTimestampConditions($start, $end, 'revs.');
174
175
        // Get the combined regex and tags for the tools
176
        list($regex, $tags) = $this->getToolRegexAndTags($project, $tool);
177
178
        $pageTable = $project->getTableName('page');
179
        $revisionTable = $project->getTableName('revision');
180
        $tagTable = $project->getTableName('change_tag');
181
        $condNamespace = $namespace === 'all' ? '' : 'AND page_namespace = :namespace';
182
        $tagJoin = $tags != '' ? "LEFT OUTER JOIN $tagTable ON (ct_rev_id = revs.rev_id)" : '';
183
        $condTag = $tags != '' ? "AND (ct_tag NOT IN ($tags) OR ct_tag IS NULL)" : '';
184
        $sql = "SELECT
185
                    page_title,
186
                    page_namespace,
187
                    revs.rev_id AS rev_id,
188
                    revs.rev_timestamp AS timestamp,
189
                    revs.rev_minor_edit AS minor,
190
                    revs.rev_len AS length,
191
                    (CAST(revs.rev_len AS SIGNED) - IFNULL(parentrevs.rev_len, 0)) AS length_change,
192
                    revs.rev_comment AS comment
193
                FROM $pageTable
194
                JOIN $revisionTable AS revs ON (page_id = revs.rev_page)
195
                LEFT JOIN $revisionTable AS parentrevs ON (revs.rev_parent_id = parentrevs.rev_id)
196
                $tagJoin
197
                WHERE revs.rev_user_text = :username
198
                AND revs.rev_timestamp > 0
199
                AND revs.rev_comment RLIKE $regex
200
                $condTag
201
                $condBegin
202
                $condEnd
203
                $condNamespace
204
                GROUP BY revs.rev_id
205
                ORDER BY revs.rev_timestamp DESC
206
                LIMIT 50
207
                OFFSET $offset";
208
209
        $resultQuery = $this->executeQuery($sql, $user, $namespace, $start, $end);
210
        $result = $resultQuery->fetchAll();
211
212
        // Cache and return.
213
        $this->stopwatch->stop($cacheKey);
214
        return $this->setCache($cacheKey, $result);
215
    }
216
217
    /**
218
     * Get counts of known automated tools used by the given user.
219
     * @param Project $project
220
     * @param User $user
221
     * @param string|int $namespace Namespace ID or 'all'.
222
     * @param string $start Start date in a format accepted by strtotime()
223
     * @param string $end End date in a format accepted by strtotime()
224
     * @return string[] Each tool that they used along with the count and link:
225
     *                  [
226
     *                      'Twinkle' => [
227
     *                          'count' => 50,
228
     *                          'link' => 'Wikipedia:Twinkle',
229
     *                      ],
230
     *                  ]
231
     */
232
    public function getToolCounts(
233
        Project $project,
234
        User $user,
235
        $namespace = 'all',
236
        $start = '',
237
        $end = ''
238
    ) {
239
        $cacheKey = $this->getCacheKey(func_get_args(), 'user_autotoolcounts');
240
        if ($this->cache->hasItem($cacheKey)) {
241
            return $this->cache->getItem($cacheKey)->get();
242
        }
243
        $this->stopwatch->start($cacheKey, 'XTools');
244
245
        $sql = $this->getAutomatedCountsSql($project, $namespace, $start, $end);
246
        $resultQuery = $this->executeQuery($sql, $user, $namespace, $start, $end);
247
248
        $automatedEditsHelper = $this->container->get('app.automated_edits_helper');
249
        $tools = $automatedEditsHelper->getTools($project);
250
251
        // handling results
252
        $results = [];
253
254
        while ($row = $resultQuery->fetch()) {
255
            // Only track tools that they've used at least once
256
            $tool = $row['toolname'];
257
            if ($row['count'] > 0) {
258
                $results[$tool] = [
259
                    'link' => $tools[$tool]['link'],
260
                    'label' => isset($tools[$tool]['label'])
261
                        ? $tools[$tool]['label']
262
                        : $tool,
263
                    'count' => $row['count'],
264
                ];
265
            }
266
        }
267
268
        // Sort the array by count
269
        uasort($results, function ($a, $b) {
270
            return $b['count'] - $a['count'];
271
        });
272
273
        // Cache and return.
274
        $this->stopwatch->stop($cacheKey);
275
        return $this->setCache($cacheKey, $results);
276
    }
277
278
    /**
279
     * Get SQL for getting counts of known automated tools used by the given user.
280
     * @see self::getAutomatedCounts()
281
     * @param Project $project
282
     * @param string|int $namespace Namespace ID or 'all'.
283
     * @param string $start Start date in a format accepted by strtotime()
284
     * @param string $end End date in a format accepted by strtotime()
285
     * @return string The SQL.
286
     */
287
    private function getAutomatedCountsSql(Project $project, $namespace, $start, $end)
288
    {
289
        list($condBegin, $condEnd) = $this->getRevTimestampConditions($start, $end);
290
291
        // Load the semi-automated edit types.
292
        $automatedEditsHelper = $this->container->get('app.automated_edits_helper');
293
        $tools = $automatedEditsHelper->getTools($project);
294
295
        // Create a collection of queries that we're going to run.
296
        $queries = [];
297
298
        $revisionTable = $project->getTableName('revision');
299
        $tagTable = $project->getTableName('change_tag');
300
301
        list($pageJoin, $condNamespace) = $this->getPageAndNamespaceSql($project, $namespace);
302
303
        $conn = $this->getProjectsConnection();
304
305
        foreach ($tools as $toolname => $values) {
306
            list($condTool, $tagJoin) = $this->getInnerAutomatedCountsSql($tagTable, $values);
307
308
            $toolname = $conn->quote($toolname, \PDO::PARAM_STR);
309
310
            // Developer error, no regex or tag provided for this tool.
311
            if ($condTool === '') {
312
                throw new Exception("No regex or tag found for the tool $toolname. " .
313
                    "Please verify this entry in semi_automated.yml");
314
            }
315
316
            $queries[] .= "
317
                SELECT $toolname AS toolname, COUNT(rev_id) AS count
318
                FROM $revisionTable
319
                $pageJoin
320
                $tagJoin
321
                WHERE rev_user_text = :username
322
                AND $condTool
323
                $condNamespace
324
                $condBegin
325
                $condEnd";
326
        }
327
328
        // Combine to one big query.
329
        return implode(' UNION ', $queries);
330
    }
331
332
    /**
333
     * Get some of the inner SQL for self::getAutomatedCountsSql().
334
     * @param  string $tagTable Name of the `change_tag` table.
335
     * @param  string[] $values Values as defined in semi_automated.yml
336
     * @return string[] [Equality clause, JOIN clause]
337
     */
338
    private function getInnerAutomatedCountsSql($tagTable, $values)
339
    {
340
        $conn = $this->getProjectsConnection();
341
        $tagJoin = '';
342
        $condTool = '';
343
344
        if (isset($values['regex'])) {
345
            $regex = $conn->quote($values['regex'], \PDO::PARAM_STR);
346
            $condTool = "rev_comment REGEXP $regex";
347
        }
348
        if (isset($values['tag'])) {
349
            $tagJoin = "LEFT OUTER JOIN $tagTable ON ct_rev_id = rev_id";
350
            $tag = $conn->quote($values['tag'], \PDO::PARAM_STR);
351
352
            // Append to regex clause if already present.
353
            // Tags are more reliable but may not be present for edits made with
354
            //   older versions of the tool, before it started adding tags.
355
            if ($condTool === '') {
356
                $condTool = "ct_tag = $tag";
357
            } else {
358
                $condTool = '(' . $condTool . " OR ct_tag = $tag)";
359
            }
360
        }
361
362
        return [$condTool, $tagJoin];
363
    }
364
365
    /**
366
     * Get the combined regex and tags for all semi-automated tools,
367
     * or the given tool, ready to be used in a query.
368
     * @param Project $project
369
     * @param string|null $tool
370
     * @return string[] In the format:
371
     *    ['combined|regex', 'combined,tags']
372
     */
373
    private function getToolRegexAndTags(Project $project, $tool = null)
374
    {
375
        $conn = $this->getProjectsConnection();
376
        $automatedEditsHelper = $this->container->get('app.automated_edits_helper');
377
        $tools = $automatedEditsHelper->getTools($project);
378
        $regexes = [];
379
        $tags = [];
380
381
        if ($tool != '') {
382
            $tools = [$tools[$tool]];
383
        }
384
385
        foreach ($tools as $tool => $values) {
386
            if (isset($values['regex'])) {
387
                $regexes[] = $values['regex'];
388
            }
389
            if (isset($values['tag'])) {
390
                $tags[] = $conn->quote($values['tag'], \PDO::PARAM_STR);
391
            }
392
        }
393
394
        return [
395
            $conn->quote(implode('|', $regexes), \PDO::PARAM_STR),
396
            implode(',', $tags),
397
        ];
398
    }
399
}
400