Passed
Pull Request — main (#442)
by MusikAnimal
08:40 queued 04:21
created

TopEditsRepository::getContainer()   A

Complexity

Conditions 1
Paths 1

Size

Total Lines 3
Code Lines 1

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
cc 1
eloc 1
nc 1
nop 0
dl 0
loc 3
rs 10
c 0
b 0
f 0
1
<?php
2
3
declare(strict_types = 1);
4
5
namespace App\Repository;
6
7
use App\Model\Edit;
8
use App\Model\Page;
9
use App\Model\Project;
10
use App\Model\User;
11
use GuzzleHttp\Client;
12
use PDO;
13
use Psr\Cache\CacheItemPoolInterface;
14
use Psr\Log\LoggerInterface;
15
use Symfony\Component\DependencyInjection\ContainerInterface;
16
use Wikimedia\IPUtils;
17
18
/**
19
 * TopEditsRepository is responsible for retrieving data from the database
20
 * about the top-edited pages of a user. It doesn't do any post-processing
21
 * of that information.
22
 * @codeCoverageIgnore
23
 */
24
class TopEditsRepository extends UserRepository
25
{
26
    protected EditRepository $editRepo;
27
    protected UserRepository $userRepo;
28
29
    public function __construct(
30
        ContainerInterface $container,
31
        CacheItemPoolInterface $cache,
32
        Client $guzzle,
33
        LoggerInterface $logger,
34
        bool $isWMF,
35
        int $queryTimeout,
36
        EditRepository $editRepo,
37
        UserRepository $userRepo,
38
        ProjectRepository $projectRepo
39
    ) {
40
        $this->editRepo = $editRepo;
41
        $this->userRepo = $userRepo;
42
        parent::__construct($container, $cache, $guzzle, $logger, $isWMF, $queryTimeout, $projectRepo);
43
    }
44
45
    /**
46
     * Factory to instantiate a new Edit for the given revision.
47
     * @param Page $page
48
     * @param array $revision
49
     * @return Edit
50
     */
51
    public function getEdit(Page $page, array $revision): Edit
52
    {
53
        return new Edit($this->editRepo, $this->userRepo, $page, $revision);
54
    }
55
56
    /**
57
     * Get the top edits by a user in a single namespace.
58
     * @param Project $project
59
     * @param User $user
60
     * @param int $namespace Namespace ID.
61
     * @param int|false $start Start date as Unix timestamp.
62
     * @param int|false $end End date as Unix timestamp.
63
     * @param int $limit Number of edits to fetch.
64
     * @param int $pagination Which page of results to return.
65
     * @return string[] page_namespace, page_title, page_is_redirect,
66
     *   count (number of edits), assessment (page assessment).
67
     */
68
    public function getTopEditsNamespace(
69
        Project $project,
70
        User $user,
71
        int $namespace = 0,
72
        $start = false,
73
        $end = false,
74
        int $limit = 1000,
75
        int $pagination = 0
76
    ): array {
77
        // Set up cache.
78
        $cacheKey = $this->getCacheKey(func_get_args(), 'topedits_ns');
79
        if ($this->cache->hasItem($cacheKey)) {
80
            return $this->cache->getItem($cacheKey)->get();
81
        }
82
83
        $revDateConditions = $this->getDateConditions($start, $end);
84
        $pageTable = $project->getTableName('page');
85
        $revisionTable = $project->getTableName('revision');
86
87
        $hasPageAssessments = $this->isWMF && $project->hasPageAssessments() && 0 === $namespace;
88
        $paTable = $project->getTableName('page_assessments');
89
        $paSelect = $hasPageAssessments
90
            ?  ", (
91
                    SELECT pa_class
92
                    FROM $paTable
93
                    WHERE pa_page_id = page_id
94
                    AND pa_class != 'Unknown'
95
                    LIMIT 1
96
                ) AS pa_class"
97
            : '';
98
99
        $ipcJoin = '';
100
        $whereClause = 'rev_actor = :actorId';
101
        $params = [];
102
        if ($user->isIpRange()) {
103
            $ipcTable = $project->getTableName('ip_changes');
104
            $ipcJoin = "JOIN $ipcTable ON rev_id = ipc_rev_id";
105
            $whereClause = 'ipc_hex BETWEEN :startIp AND :endIp';
106
            [$params['startIp'], $params['endIp']] = IPUtils::parseRange($user->getUsername());
107
        }
108
109
        $offset = $pagination * $limit;
110
        $sql = "SELECT page_namespace, page_title, page_is_redirect, COUNT(page_title) AS count
111
                    $paSelect
112
                FROM $pageTable
113
                JOIN $revisionTable ON page_id = rev_page
114
                $ipcJoin
115
                WHERE $whereClause
116
                AND page_namespace = :namespace
117
                $revDateConditions
118
                GROUP BY page_namespace, page_title
119
                ORDER BY count DESC
120
                LIMIT $limit
121
                OFFSET $offset";
122
123
        $resultQuery = $this->executeQuery($sql, $project, $user, $namespace, $params);
124
        $result = $resultQuery->fetchAllAssociative();
125
126
        // Cache and return.
127
        return $this->setCache($cacheKey, $result);
128
    }
129
130
    /**
131
     * Count the number of edits in the given namespace.
132
     * @param Project $project
133
     * @param User $user
134
     * @param int|string $namespace
135
     * @param int|false $start Start date as Unix timestamp.
136
     * @param int|false $end End date as Unix timestamp.
137
     * @return mixed
138
     */
139
    public function countEditsNamespace(Project $project, User $user, $namespace, $start = false, $end = false)
140
    {
141
        // Set up cache.
142
        $cacheKey = $this->getCacheKey(func_get_args(), 'topedits_count_ns');
143
        if ($this->cache->hasItem($cacheKey)) {
144
            return $this->cache->getItem($cacheKey)->get();
145
        }
146
147
        $revDateConditions = $this->getDateConditions($start, $end);
148
        $pageTable = $project->getTableName('page');
149
        $revisionTable = $project->getTableName('revision');
150
151
        $ipcJoin = '';
152
        $whereClause = 'rev_actor = :actorId';
153
        $params = [];
154
        if ($user->isIpRange()) {
155
            $ipcTable = $project->getTableName('ip_changes');
156
            $ipcJoin = "JOIN $ipcTable ON rev_id = ipc_rev_id";
157
            $whereClause = 'ipc_hex BETWEEN :startIp AND :endIp';
158
            [$params['startIp'], $params['endIp']] = IPUtils::parseRange($user->getUsername());
159
        }
160
161
        $sql = "SELECT COUNT(DISTINCT page_id)
162
                FROM $pageTable
163
                JOIN $revisionTable ON page_id = rev_page
164
                $ipcJoin
165
                WHERE $whereClause
166
                AND page_namespace = :namespace
167
                $revDateConditions";
168
169
        $resultQuery = $this->executeQuery($sql, $project, $user, $namespace, $params);
170
171
        // Cache and return.
172
        return $this->setCache($cacheKey, $resultQuery->fetchOne());
173
    }
174
175
    /**
176
     * Get the top edits by a user across all namespaces.
177
     * @param Project $project
178
     * @param User $user
179
     * @param int|false $start Start date as Unix timestamp.
180
     * @param int|false $end End date as Unix timestamp.
181
     * @param int $limit Number of edits to fetch.
182
     * @return string[] page_namespace, page_title, page_is_redirect,
183
     *   count (number of edits), assessment (page assessment).
184
     */
185
    public function getTopEditsAllNamespaces(
186
        Project $project,
187
        User $user,
188
        $start = false,
189
        $end = false,
190
        int $limit = 10
191
    ): array {
192
        // Set up cache.
193
        $cacheKey = $this->getCacheKey(func_get_args(), 'topedits_all');
194
        if ($this->cache->hasItem($cacheKey)) {
195
            return $this->cache->getItem($cacheKey)->get();
196
        }
197
198
        $revDateConditions = $this->getDateConditions($start, $end);
199
        $pageTable = $this->getTableName($project->getDatabaseName(), 'page');
200
        $revisionTable = $this->getTableName($project->getDatabaseName(), 'revision');
201
        $hasPageAssessments = $this->isWMF && $project->hasPageAssessments();
202
        $pageAssessmentsTable = $this->getTableName($project->getDatabaseName(), 'page_assessments');
203
        $paSelect = $hasPageAssessments
204
            ?  ", (
205
                    SELECT pa_class
206
                    FROM $pageAssessmentsTable
207
                    WHERE pa_page_id = e.page_id
208
                    LIMIT 1
209
                ) AS pa_class"
210
            : ', NULL as pa_class';
211
212
        $ipcJoin = '';
213
        $whereClause = 'rev_actor = :actorId';
214
        $params = [];
215
        if ($user->isIpRange()) {
216
            $ipcTable = $project->getTableName('ip_changes');
217
            $ipcJoin = "JOIN $ipcTable ON rev_id = ipc_rev_id";
218
            $whereClause = 'ipc_hex BETWEEN :startIp AND :endIp';
219
            [$params['startIp'], $params['endIp']] = IPUtils::parseRange($user->getUsername());
220
        }
221
222
        $sql = "SELECT c.page_namespace, e.page_title, c.page_is_redirect, c.count $paSelect
223
                FROM
224
                (
225
                    SELECT b.page_namespace, b.page_is_redirect, b.rev_page, b.count
226
                        ,@rn := if(@ns = b.page_namespace, @rn + 1, 1) AS row_number
227
                        ,@ns := b.page_namespace AS dummy
228
                    FROM
229
                    (
230
                        SELECT page_namespace, page_is_redirect, rev_page, count(rev_page) AS count
231
                        FROM $revisionTable
232
                        $ipcJoin
233
                        JOIN $pageTable ON page_id = rev_page
234
                        WHERE $whereClause
235
                        $revDateConditions
236
                        GROUP BY page_namespace, rev_page
237
                    ) AS b
238
                    JOIN (SELECT @ns := NULL, @rn := 0) AS vars
239
                    ORDER BY b.page_namespace ASC, b.count DESC
240
                ) AS c
241
                JOIN $pageTable e ON e.page_id = c.rev_page
242
                WHERE c.row_number <= $limit";
243
        $resultQuery = $this->executeQuery($sql, $project, $user, 'all', $params);
244
        $result = $resultQuery->fetchAllAssociative();
245
246
        // Cache and return.
247
        return $this->setCache($cacheKey, $result);
248
    }
249
250
    /**
251
     * Get the top edits by a user to a single page.
252
     * @param Page $page
253
     * @param User $user
254
     * @param int|false $start Start date as Unix timestamp.
255
     * @param int|false $end End date as Unix timestamp.
256
     * @return string[][] Each row with keys 'id', 'timestamp', 'minor', 'length',
257
     *   'length_change', 'reverted', 'user_id', 'username', 'comment', 'parent_comment'
258
     */
259
    public function getTopEditsPage(Page $page, User $user, $start = false, $end = false): array
260
    {
261
        // Set up cache.
262
        $cacheKey = $this->getCacheKey(func_get_args(), 'topedits_page');
263
        if ($this->cache->hasItem($cacheKey)) {
264
            return $this->cache->getItem($cacheKey)->get();
265
        }
266
267
        $results = $this->queryTopEditsPage($page, $user, $start, $end, true);
268
269
        // Now we need to get the most recent revision, since the childrevs stuff excludes it.
270
        $lastRev = $this->queryTopEditsPage($page, $user, $start, $end, false);
271
        if (empty($results) || $lastRev[0]['id'] !== $results[0]['id']) {
272
            $results = array_merge($lastRev, $results);
273
        }
274
275
        // Cache and return.
276
        return $this->setCache($cacheKey, $results);
277
    }
278
279
    /**
280
     * The actual query to get the top edits by the user to the page.
281
     * Because of the way the main query works, we aren't given the most recent revision,
282
     * so we have to call this twice, once with $childRevs set to true and once with false.
283
     * @param Page $page
284
     * @param User $user
285
     * @param int|false $start Start date as Unix timestamp.
286
     * @param int|false $end End date as Unix timestamp.
287
     * @param boolean $childRevs Whether to include child revisions.
288
     * @return array Each row with keys 'id', 'timestamp', 'minor', 'length',
289
     *   'length_change', 'reverted', 'user_id', 'username', 'comment', 'parent_comment'
290
     */
291
    private function queryTopEditsPage(
292
        Page $page,
293
        User $user,
294
        $start = false,
295
        $end = false,
296
        bool $childRevs = false
297
    ): array {
298
        $project = $page->getProject();
299
        $revDateConditions = $this->getDateConditions($start, $end, false, 'revs.');
300
        $revTable = $project->getTableName('revision');
301
        $commentTable = $project->getTableName('comment');
302
        $tagTable = $project->getTableName('change_tag');
303
        $tagDefTable = $project->getTableName('change_tag_def');
304
305
        if ($childRevs) {
306
            $childSelect = ", (
307
                    CASE WHEN
308
                        childrevs.rev_sha1 = parentrevs.rev_sha1
309
                        OR (
310
                            SELECT 1
311
                            FROM $tagTable
312
                            WHERE ct_rev_id = revs.rev_id
313
                            AND ct_tag_id = (
314
                                SELECT ctd_id
315
                                FROM $tagDefTable
316
                                WHERE ctd_name = 'mw-reverted'
317
                            )
318
                        )
319
                    THEN 1
320
                    ELSE 0
321
                    END
322
                ) AS `reverted`,
323
                childcomments.comment_text AS `parent_comment`";
324
            $childJoin = "LEFT JOIN $revTable AS childrevs ON (revs.rev_id = childrevs.rev_parent_id)
325
                LEFT OUTER JOIN $commentTable AS childcomments
326
                ON (childrevs.rev_comment_id = childcomments.comment_id)";
327
            $childWhere = 'AND childrevs.rev_page = :pageid';
328
            $childLimit = '';
329
        } else {
330
            $childSelect = ', "" AS parent_comment, 0 AS reverted';
331
            $childJoin = '';
332
            $childWhere = '';
333
            $childLimit = 'LIMIT 1';
334
        }
335
336
        $userId = $this->getProjectsConnection($project)->quote($user->getId($page->getProject()), PDO::PARAM_STR);
337
        $username = $this->getProjectsConnection($project)->quote($user->getUsername(), PDO::PARAM_STR);
338
339
        // IP range handling.
340
        $ipcJoin = '';
341
        $whereClause = 'revs.rev_actor = :actorId';
342
        $params = ['pageid' => $page->getId()];
343
        if ($user->isIpRange()) {
344
            $ipcTable = $project->getTableName('ip_changes');
345
            $ipcJoin = "JOIN $ipcTable ON revs.rev_id = ipc_rev_id";
346
            $whereClause = 'ipc_hex BETWEEN :startIp AND :endIp';
347
            [$params['startIp'], $params['endIp']] = IPUtils::parseRange($user->getUsername());
348
        }
349
350
        $sql = "SELECT * FROM (
351
                    SELECT
352
                        revs.rev_id AS id,
353
                        revs.rev_timestamp AS timestamp,
354
                        revs.rev_minor_edit AS minor,
355
                        revs.rev_len AS length,
356
                        (CAST(revs.rev_len AS SIGNED) - IFNULL(parentrevs.rev_len, 0)) AS length_change,
357
                        $userId AS user_id,
358
                        $username AS username,
359
                        comments.comment_text AS `comment`
360
                        $childSelect
361
                    FROM $revTable AS revs
362
                    LEFT JOIN $revTable AS parentrevs ON (revs.rev_parent_id = parentrevs.rev_id)
363
                    $ipcJoin
364
                    LEFT OUTER JOIN $commentTable AS comments ON (revs.rev_comment_id = comments.comment_id)
365
                    $childJoin
366
                    WHERE $whereClause
367
                    $revDateConditions
368
                    AND revs.rev_page = :pageid
369
                    $childWhere
370
                ) a
371
                ORDER BY timestamp DESC
372
                $childLimit";
373
374
        $resultQuery = $this->executeQuery($sql, $project, $user, null, $params);
375
        return $resultQuery->fetchAllAssociative();
376
    }
377
}
378