Passed
Push — main ( fdfd45...8951d1 )
by MusikAnimal
03:10
created

TopEditsRepository::getEdit()   A

Complexity

Conditions 1
Paths 1

Size

Total Lines 3
Code Lines 1

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
cc 1
eloc 1
nc 1
nop 2
dl 0
loc 3
rs 10
c 0
b 0
f 0
1
<?php
2
3
declare(strict_types = 1);
4
5
namespace App\Repository;
6
7
use App\Model\Edit;
8
use App\Model\Page;
9
use App\Model\Project;
10
use App\Model\User;
11
use Doctrine\Persistence\ManagerRegistry;
12
use GuzzleHttp\Client;
13
use PDO;
14
use Psr\Cache\CacheItemPoolInterface;
15
use Psr\Log\LoggerInterface;
16
use Symfony\Component\DependencyInjection\ParameterBag\ParameterBagInterface;
17
use Symfony\Component\HttpFoundation\RequestStack;
18
use Symfony\Component\HttpFoundation\Session\SessionInterface;
19
use Wikimedia\IPUtils;
20
21
/**
22
 * TopEditsRepository is responsible for retrieving data from the database
23
 * about the top-edited pages of a user. It doesn't do any post-processing
24
 * of that information.
25
 * @codeCoverageIgnore
26
 */
27
class TopEditsRepository extends UserRepository
28
{
29
    protected EditRepository $editRepo;
30
    protected UserRepository $userRepo;
31
32
    /**
33
     * @param ManagerRegistry $managerRegistry
34
     * @param CacheItemPoolInterface $cache
35
     * @param Client $guzzle
36
     * @param LoggerInterface $logger
37
     * @param ParameterBagInterface $parameterBag
38
     * @param bool $isWMF
39
     * @param int $queryTimeout
40
     * @param ProjectRepository $projectRepo
41
     * @param EditRepository $editRepo
42
     * @param UserRepository $userRepo
43
     * @param SessionInterface $session
44
     */
45
    public function __construct(
46
        ManagerRegistry $managerRegistry,
47
        CacheItemPoolInterface $cache,
48
        Client $guzzle,
49
        LoggerInterface $logger,
50
        ParameterBagInterface $parameterBag,
51
        bool $isWMF,
52
        int $queryTimeout,
53
        ProjectRepository $projectRepo,
54
        EditRepository $editRepo,
55
        UserRepository $userRepo,
56
        RequestStack $requestStack
57
    ) {
58
        $this->editRepo = $editRepo;
59
        $this->userRepo = $userRepo;
60
        parent::__construct(
61
            $managerRegistry,
62
            $cache,
63
            $guzzle,
64
            $logger,
65
            $parameterBag,
66
            $isWMF,
67
            $queryTimeout,
68
            $projectRepo,
69
            $requestStack
70
        );
71
    }
72
73
    /**
74
     * Factory to instantiate a new Edit for the given revision.
75
     * @param Page $page
76
     * @param array $revision
77
     * @return Edit
78
     */
79
    public function getEdit(Page $page, array $revision): Edit
80
    {
81
        return new Edit($this->editRepo, $this->userRepo, $page, $revision);
82
    }
83
84
    /**
85
     * Get the top edits by a user in a single namespace.
86
     * @param Project $project
87
     * @param User $user
88
     * @param int $namespace Namespace ID.
89
     * @param int|false $start Start date as Unix timestamp.
90
     * @param int|false $end End date as Unix timestamp.
91
     * @param int $limit Number of edits to fetch.
92
     * @param int $pagination Which page of results to return.
93
     * @return string[] namespace, page_title, redirect, count (number of edits), assessment (page assessment).
94
     */
95
    public function getTopEditsNamespace(
96
        Project $project,
97
        User $user,
98
        int $namespace = 0,
99
        $start = false,
100
        $end = false,
101
        int $limit = 1000,
102
        int $pagination = 0
103
    ): array {
104
        // Set up cache.
105
        $cacheKey = $this->getCacheKey(func_get_args(), 'topedits_ns');
106
        if ($this->cache->hasItem($cacheKey)) {
107
            return $this->cache->getItem($cacheKey)->get();
108
        }
109
110
        $revDateConditions = $this->getDateConditions($start, $end);
111
        $pageTable = $project->getTableName('page');
112
        $revisionTable = $project->getTableName('revision');
113
114
        $hasPageAssessments = $this->isWMF && $project->hasPageAssessments($namespace);
115
        $paTable = $project->getTableName('page_assessments');
116
        $paSelect = $hasPageAssessments
117
            ?  ", (
118
                    SELECT pa_class
119
                    FROM $paTable
120
                    WHERE pa_page_id = page_id
121
                    AND pa_class != 'Unknown'
122
                    LIMIT 1
123
                ) AS pa_class"
124
            : '';
125
126
        $ipcJoin = '';
127
        $whereClause = 'rev_actor = :actorId';
128
        $params = [];
129
        if ($user->isIpRange()) {
130
            $ipcTable = $project->getTableName('ip_changes');
131
            $ipcJoin = "JOIN $ipcTable ON rev_id = ipc_rev_id";
132
            $whereClause = 'ipc_hex BETWEEN :startIp AND :endIp';
133
            [$params['startIp'], $params['endIp']] = IPUtils::parseRange($user->getUsername());
134
        }
135
136
        $offset = $pagination * $limit;
137
        $sql = "SELECT page_namespace AS `namespace`, page_title,
138
                    page_is_redirect AS `redirect`, COUNT(page_title) AS `count`
139
                    $paSelect
140
                FROM $pageTable
141
                JOIN $revisionTable ON page_id = rev_page
142
                $ipcJoin
143
                WHERE $whereClause
144
                AND page_namespace = :namespace
145
                $revDateConditions
146
                GROUP BY page_namespace, page_title
147
                ORDER BY count DESC
148
                LIMIT $limit
149
                OFFSET $offset";
150
151
        $resultQuery = $this->executeQuery($sql, $project, $user, $namespace, $params);
152
        $result = $resultQuery->fetchAllAssociative();
153
154
        // Cache and return.
155
        return $this->setCache($cacheKey, $result);
156
    }
157
158
    /**
159
     * Count the number of edits in the given namespace.
160
     * @param Project $project
161
     * @param User $user
162
     * @param int|string $namespace
163
     * @param int|false $start Start date as Unix timestamp.
164
     * @param int|false $end End date as Unix timestamp.
165
     * @return mixed
166
     */
167
    public function countEditsNamespace(Project $project, User $user, $namespace, $start = false, $end = false)
168
    {
169
        // Set up cache.
170
        $cacheKey = $this->getCacheKey(func_get_args(), 'topedits_count_ns');
171
        if ($this->cache->hasItem($cacheKey)) {
172
            return $this->cache->getItem($cacheKey)->get();
173
        }
174
175
        $revDateConditions = $this->getDateConditions($start, $end);
176
        $pageTable = $project->getTableName('page');
177
        $revisionTable = $project->getTableName('revision');
178
179
        $ipcJoin = '';
180
        $whereClause = 'rev_actor = :actorId';
181
        $params = [];
182
        if ($user->isIpRange()) {
183
            $ipcTable = $project->getTableName('ip_changes');
184
            $ipcJoin = "JOIN $ipcTable ON rev_id = ipc_rev_id";
185
            $whereClause = 'ipc_hex BETWEEN :startIp AND :endIp';
186
            [$params['startIp'], $params['endIp']] = IPUtils::parseRange($user->getUsername());
187
        }
188
189
        $sql = "SELECT COUNT(DISTINCT page_id)
190
                FROM $pageTable
191
                JOIN $revisionTable ON page_id = rev_page
192
                $ipcJoin
193
                WHERE $whereClause
194
                AND page_namespace = :namespace
195
                $revDateConditions";
196
197
        $resultQuery = $this->executeQuery($sql, $project, $user, $namespace, $params);
198
199
        // Cache and return.
200
        return $this->setCache($cacheKey, $resultQuery->fetchOne());
201
    }
202
203
    /**
204
     * Get the top edits by a user across all namespaces.
205
     * @param Project $project
206
     * @param User $user
207
     * @param int|false $start Start date as Unix timestamp.
208
     * @param int|false $end End date as Unix timestamp.
209
     * @param int $limit Number of edits to fetch.
210
     * @return string[] namespace, page_title, redirect, count (number of edits), assessment (page assessment).
211
     */
212
    public function getTopEditsAllNamespaces(
213
        Project $project,
214
        User $user,
215
        $start = false,
216
        $end = false,
217
        int $limit = 10
218
    ): array {
219
        // Set up cache.
220
        $cacheKey = $this->getCacheKey(func_get_args(), 'topedits_all');
221
        if ($this->cache->hasItem($cacheKey)) {
222
            return $this->cache->getItem($cacheKey)->get();
223
        }
224
225
        $revDateConditions = $this->getDateConditions($start, $end);
226
        $pageTable = $this->getTableName($project->getDatabaseName(), 'page');
227
        $revisionTable = $this->getTableName($project->getDatabaseName(), 'revision');
228
        $hasPageAssessments = $this->isWMF && $project->hasPageAssessments();
229
        $pageAssessmentsTable = $this->getTableName($project->getDatabaseName(), 'page_assessments');
230
        $paSelect = $hasPageAssessments
231
            ?  ", (
232
                    SELECT pa_class
233
                    FROM $pageAssessmentsTable
234
                    WHERE pa_page_id = e.page_id
235
                    LIMIT 1
236
                ) AS pa_class"
237
            : '';
238
239
        $ipcJoin = '';
240
        $whereClause = 'rev_actor = :actorId';
241
        $params = [];
242
        if ($user->isIpRange()) {
243
            $ipcTable = $project->getTableName('ip_changes');
244
            $ipcJoin = "JOIN $ipcTable ON rev_id = ipc_rev_id";
245
            $whereClause = 'ipc_hex BETWEEN :startIp AND :endIp';
246
            [$params['startIp'], $params['endIp']] = IPUtils::parseRange($user->getUsername());
247
        }
248
249
        $sql = "SELECT c.page_namespace AS `namespace`, e.page_title,
250
                    c.page_is_redirect AS `redirect`, c.count $paSelect
251
                FROM
252
                (
253
                    SELECT b.page_namespace, b.page_is_redirect, b.rev_page, b.count
254
                        ,@rn := if(@ns = b.page_namespace, @rn + 1, 1) AS row_number
255
                        ,@ns := b.page_namespace AS dummy
256
                    FROM
257
                    (
258
                        SELECT page_namespace, page_is_redirect, rev_page, count(rev_page) AS count
259
                        FROM $revisionTable
260
                        $ipcJoin
261
                        JOIN $pageTable ON page_id = rev_page
262
                        WHERE $whereClause
263
                        $revDateConditions
264
                        GROUP BY page_namespace, rev_page
265
                    ) AS b
266
                    JOIN (SELECT @ns := NULL, @rn := 0) AS vars
267
                    ORDER BY b.page_namespace ASC, b.count DESC
268
                ) AS c
269
                JOIN $pageTable e ON e.page_id = c.rev_page
270
                WHERE c.row_number <= $limit";
271
        $resultQuery = $this->executeQuery($sql, $project, $user, 'all', $params);
272
        $result = $resultQuery->fetchAllAssociative();
273
274
        // Cache and return.
275
        return $this->setCache($cacheKey, $result);
276
    }
277
278
    /**
279
     * Get the top edits by a user to a single page.
280
     * @param Page $page
281
     * @param User $user
282
     * @param int|false $start Start date as Unix timestamp.
283
     * @param int|false $end End date as Unix timestamp.
284
     * @return string[][] Each row with keys 'id', 'timestamp', 'minor', 'length',
285
     *   'length_change', 'reverted', 'user_id', 'username', 'comment', 'parent_comment'
286
     */
287
    public function getTopEditsPage(Page $page, User $user, $start = false, $end = false): array
288
    {
289
        // Set up cache.
290
        $cacheKey = $this->getCacheKey(func_get_args(), 'topedits_page');
291
        if ($this->cache->hasItem($cacheKey)) {
292
            return $this->cache->getItem($cacheKey)->get();
293
        }
294
295
        $results = $this->queryTopEditsPage($page, $user, $start, $end, true);
296
297
        // Now we need to get the most recent revision, since the childrevs stuff excludes it.
298
        $lastRev = $this->queryTopEditsPage($page, $user, $start, $end, false);
299
        if (empty($results) || $lastRev[0]['id'] !== $results[0]['id']) {
300
            $results = array_merge($lastRev, $results);
301
        }
302
303
        // Cache and return.
304
        return $this->setCache($cacheKey, $results);
305
    }
306
307
    /**
308
     * The actual query to get the top edits by the user to the page.
309
     * Because of the way the main query works, we aren't given the most recent revision,
310
     * so we have to call this twice, once with $childRevs set to true and once with false.
311
     * @param Page $page
312
     * @param User $user
313
     * @param int|false $start Start date as Unix timestamp.
314
     * @param int|false $end End date as Unix timestamp.
315
     * @param boolean $childRevs Whether to include child revisions.
316
     * @return array Each row with keys 'id', 'timestamp', 'minor', 'length',
317
     *   'length_change', 'reverted', 'user_id', 'username', 'comment', 'parent_comment'
318
     */
319
    private function queryTopEditsPage(
320
        Page $page,
321
        User $user,
322
        $start = false,
323
        $end = false,
324
        bool $childRevs = false
325
    ): array {
326
        $project = $page->getProject();
327
        $revDateConditions = $this->getDateConditions($start, $end, false, 'revs.');
328
        $revTable = $project->getTableName('revision');
329
        $commentTable = $project->getTableName('comment');
330
        $tagTable = $project->getTableName('change_tag');
331
        $tagDefTable = $project->getTableName('change_tag_def');
332
333
        if ($childRevs) {
334
            $childSelect = ", (
335
                    CASE WHEN
336
                        childrevs.rev_sha1 = parentrevs.rev_sha1
337
                        OR (
338
                            SELECT 1
339
                            FROM $tagTable
340
                            WHERE ct_rev_id = revs.rev_id
341
                            AND ct_tag_id = (
342
                                SELECT ctd_id
343
                                FROM $tagDefTable
344
                                WHERE ctd_name = 'mw-reverted'
345
                            )
346
                        )
347
                    THEN 1
348
                    ELSE 0
349
                    END
350
                ) AS `reverted`,
351
                childcomments.comment_text AS `parent_comment`";
352
            $childJoin = "LEFT JOIN $revTable AS childrevs ON (revs.rev_id = childrevs.rev_parent_id)
353
                LEFT OUTER JOIN $commentTable AS childcomments
354
                ON (childrevs.rev_comment_id = childcomments.comment_id)";
355
            $childWhere = 'AND childrevs.rev_page = :pageid';
356
            $childLimit = '';
357
        } else {
358
            $childSelect = ', "" AS parent_comment, 0 AS reverted';
359
            $childJoin = '';
360
            $childWhere = '';
361
            $childLimit = 'LIMIT 1';
362
        }
363
364
        $userId = $this->getProjectsConnection($project)->quote($user->getId($page->getProject()), PDO::PARAM_STR);
365
        $username = $this->getProjectsConnection($project)->quote($user->getUsername(), PDO::PARAM_STR);
366
367
        // IP range handling.
368
        $ipcJoin = '';
369
        $whereClause = 'revs.rev_actor = :actorId';
370
        $params = ['pageid' => $page->getId()];
371
        if ($user->isIpRange()) {
372
            $ipcTable = $project->getTableName('ip_changes');
373
            $ipcJoin = "JOIN $ipcTable ON revs.rev_id = ipc_rev_id";
374
            $whereClause = 'ipc_hex BETWEEN :startIp AND :endIp';
375
            [$params['startIp'], $params['endIp']] = IPUtils::parseRange($user->getUsername());
376
        }
377
378
        $sql = "SELECT * FROM (
379
                    SELECT
380
                        revs.rev_id AS id,
381
                        revs.rev_timestamp AS timestamp,
382
                        revs.rev_minor_edit AS minor,
383
                        revs.rev_len AS length,
384
                        (CAST(revs.rev_len AS SIGNED) - IFNULL(parentrevs.rev_len, 0)) AS length_change,
385
                        $userId AS user_id,
386
                        $username AS username,
387
                        comments.comment_text AS `comment`
388
                        $childSelect
389
                    FROM $revTable AS revs
390
                    LEFT JOIN $revTable AS parentrevs ON (revs.rev_parent_id = parentrevs.rev_id)
391
                    $ipcJoin
392
                    LEFT OUTER JOIN $commentTable AS comments ON (revs.rev_comment_id = comments.comment_id)
393
                    $childJoin
394
                    WHERE $whereClause
395
                    $revDateConditions
396
                    AND revs.rev_page = :pageid
397
                    $childWhere
398
                ) a
399
                ORDER BY timestamp DESC
400
                $childLimit";
401
402
        $resultQuery = $this->executeQuery($sql, $project, $user, null, $params);
403
        return $resultQuery->fetchAllAssociative();
404
    }
405
}
406