Passed
Push — main ( fdfd45...8951d1 )
by MusikAnimal
03:10
created

TopEditsRepository::__construct()   A

Complexity

Conditions 1
Paths 1

Size

Total Lines 25
Code Lines 12

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
cc 1
eloc 12
nc 1
nop 11
dl 0
loc 25
rs 9.8666
c 0
b 0
f 0

How to fix   Many Parameters   

Many Parameters

Methods with many parameters are not only hard to understand, but their parameters also often become inconsistent when you need more, or different data.

There are several approaches to avoid long parameter lists:

1
<?php
2
3
declare(strict_types = 1);
4
5
namespace App\Repository;
6
7
use App\Model\Edit;
8
use App\Model\Page;
9
use App\Model\Project;
10
use App\Model\User;
11
use Doctrine\Persistence\ManagerRegistry;
12
use GuzzleHttp\Client;
13
use PDO;
14
use Psr\Cache\CacheItemPoolInterface;
15
use Psr\Log\LoggerInterface;
16
use Symfony\Component\DependencyInjection\ParameterBag\ParameterBagInterface;
17
use Symfony\Component\HttpFoundation\RequestStack;
18
use Symfony\Component\HttpFoundation\Session\SessionInterface;
19
use Wikimedia\IPUtils;
20
21
/**
22
 * TopEditsRepository is responsible for retrieving data from the database
23
 * about the top-edited pages of a user. It doesn't do any post-processing
24
 * of that information.
25
 * @codeCoverageIgnore
26
 */
27
class TopEditsRepository extends UserRepository
28
{
29
    protected EditRepository $editRepo;
30
    protected UserRepository $userRepo;
31
32
    /**
33
     * @param ManagerRegistry $managerRegistry
34
     * @param CacheItemPoolInterface $cache
35
     * @param Client $guzzle
36
     * @param LoggerInterface $logger
37
     * @param ParameterBagInterface $parameterBag
38
     * @param bool $isWMF
39
     * @param int $queryTimeout
40
     * @param ProjectRepository $projectRepo
41
     * @param EditRepository $editRepo
42
     * @param UserRepository $userRepo
43
     * @param SessionInterface $session
44
     */
45
    public function __construct(
46
        ManagerRegistry $managerRegistry,
47
        CacheItemPoolInterface $cache,
48
        Client $guzzle,
49
        LoggerInterface $logger,
50
        ParameterBagInterface $parameterBag,
51
        bool $isWMF,
52
        int $queryTimeout,
53
        ProjectRepository $projectRepo,
54
        EditRepository $editRepo,
55
        UserRepository $userRepo,
56
        RequestStack $requestStack
57
    ) {
58
        $this->editRepo = $editRepo;
59
        $this->userRepo = $userRepo;
60
        parent::__construct(
61
            $managerRegistry,
62
            $cache,
63
            $guzzle,
64
            $logger,
65
            $parameterBag,
66
            $isWMF,
67
            $queryTimeout,
68
            $projectRepo,
69
            $requestStack
70
        );
71
    }
72
73
    /**
74
     * Factory to instantiate a new Edit for the given revision.
75
     * @param Page $page
76
     * @param array $revision
77
     * @return Edit
78
     */
79
    public function getEdit(Page $page, array $revision): Edit
80
    {
81
        return new Edit($this->editRepo, $this->userRepo, $page, $revision);
82
    }
83
84
    /**
85
     * Get the top edits by a user in a single namespace.
86
     * @param Project $project
87
     * @param User $user
88
     * @param int $namespace Namespace ID.
89
     * @param int|false $start Start date as Unix timestamp.
90
     * @param int|false $end End date as Unix timestamp.
91
     * @param int $limit Number of edits to fetch.
92
     * @param int $pagination Which page of results to return.
93
     * @return string[] namespace, page_title, redirect, count (number of edits), assessment (page assessment).
94
     */
95
    public function getTopEditsNamespace(
96
        Project $project,
97
        User $user,
98
        int $namespace = 0,
99
        $start = false,
100
        $end = false,
101
        int $limit = 1000,
102
        int $pagination = 0
103
    ): array {
104
        // Set up cache.
105
        $cacheKey = $this->getCacheKey(func_get_args(), 'topedits_ns');
106
        if ($this->cache->hasItem($cacheKey)) {
107
            return $this->cache->getItem($cacheKey)->get();
108
        }
109
110
        $revDateConditions = $this->getDateConditions($start, $end);
111
        $pageTable = $project->getTableName('page');
112
        $revisionTable = $project->getTableName('revision');
113
114
        $hasPageAssessments = $this->isWMF && $project->hasPageAssessments($namespace);
115
        $paTable = $project->getTableName('page_assessments');
116
        $paSelect = $hasPageAssessments
117
            ?  ", (
118
                    SELECT pa_class
119
                    FROM $paTable
120
                    WHERE pa_page_id = page_id
121
                    AND pa_class != 'Unknown'
122
                    LIMIT 1
123
                ) AS pa_class"
124
            : '';
125
126
        $ipcJoin = '';
127
        $whereClause = 'rev_actor = :actorId';
128
        $params = [];
129
        if ($user->isIpRange()) {
130
            $ipcTable = $project->getTableName('ip_changes');
131
            $ipcJoin = "JOIN $ipcTable ON rev_id = ipc_rev_id";
132
            $whereClause = 'ipc_hex BETWEEN :startIp AND :endIp';
133
            [$params['startIp'], $params['endIp']] = IPUtils::parseRange($user->getUsername());
134
        }
135
136
        $offset = $pagination * $limit;
137
        $sql = "SELECT page_namespace AS `namespace`, page_title,
138
                    page_is_redirect AS `redirect`, COUNT(page_title) AS `count`
139
                    $paSelect
140
                FROM $pageTable
141
                JOIN $revisionTable ON page_id = rev_page
142
                $ipcJoin
143
                WHERE $whereClause
144
                AND page_namespace = :namespace
145
                $revDateConditions
146
                GROUP BY page_namespace, page_title
147
                ORDER BY count DESC
148
                LIMIT $limit
149
                OFFSET $offset";
150
151
        $resultQuery = $this->executeQuery($sql, $project, $user, $namespace, $params);
152
        $result = $resultQuery->fetchAllAssociative();
153
154
        // Cache and return.
155
        return $this->setCache($cacheKey, $result);
156
    }
157
158
    /**
159
     * Count the number of edits in the given namespace.
160
     * @param Project $project
161
     * @param User $user
162
     * @param int|string $namespace
163
     * @param int|false $start Start date as Unix timestamp.
164
     * @param int|false $end End date as Unix timestamp.
165
     * @return mixed
166
     */
167
    public function countEditsNamespace(Project $project, User $user, $namespace, $start = false, $end = false)
168
    {
169
        // Set up cache.
170
        $cacheKey = $this->getCacheKey(func_get_args(), 'topedits_count_ns');
171
        if ($this->cache->hasItem($cacheKey)) {
172
            return $this->cache->getItem($cacheKey)->get();
173
        }
174
175
        $revDateConditions = $this->getDateConditions($start, $end);
176
        $pageTable = $project->getTableName('page');
177
        $revisionTable = $project->getTableName('revision');
178
179
        $ipcJoin = '';
180
        $whereClause = 'rev_actor = :actorId';
181
        $params = [];
182
        if ($user->isIpRange()) {
183
            $ipcTable = $project->getTableName('ip_changes');
184
            $ipcJoin = "JOIN $ipcTable ON rev_id = ipc_rev_id";
185
            $whereClause = 'ipc_hex BETWEEN :startIp AND :endIp';
186
            [$params['startIp'], $params['endIp']] = IPUtils::parseRange($user->getUsername());
187
        }
188
189
        $sql = "SELECT COUNT(DISTINCT page_id)
190
                FROM $pageTable
191
                JOIN $revisionTable ON page_id = rev_page
192
                $ipcJoin
193
                WHERE $whereClause
194
                AND page_namespace = :namespace
195
                $revDateConditions";
196
197
        $resultQuery = $this->executeQuery($sql, $project, $user, $namespace, $params);
198
199
        // Cache and return.
200
        return $this->setCache($cacheKey, $resultQuery->fetchOne());
201
    }
202
203
    /**
204
     * Get the top edits by a user across all namespaces.
205
     * @param Project $project
206
     * @param User $user
207
     * @param int|false $start Start date as Unix timestamp.
208
     * @param int|false $end End date as Unix timestamp.
209
     * @param int $limit Number of edits to fetch.
210
     * @return string[] namespace, page_title, redirect, count (number of edits), assessment (page assessment).
211
     */
212
    public function getTopEditsAllNamespaces(
213
        Project $project,
214
        User $user,
215
        $start = false,
216
        $end = false,
217
        int $limit = 10
218
    ): array {
219
        // Set up cache.
220
        $cacheKey = $this->getCacheKey(func_get_args(), 'topedits_all');
221
        if ($this->cache->hasItem($cacheKey)) {
222
            return $this->cache->getItem($cacheKey)->get();
223
        }
224
225
        $revDateConditions = $this->getDateConditions($start, $end);
226
        $pageTable = $this->getTableName($project->getDatabaseName(), 'page');
227
        $revisionTable = $this->getTableName($project->getDatabaseName(), 'revision');
228
        $hasPageAssessments = $this->isWMF && $project->hasPageAssessments();
229
        $pageAssessmentsTable = $this->getTableName($project->getDatabaseName(), 'page_assessments');
230
        $paSelect = $hasPageAssessments
231
            ?  ", (
232
                    SELECT pa_class
233
                    FROM $pageAssessmentsTable
234
                    WHERE pa_page_id = e.page_id
235
                    LIMIT 1
236
                ) AS pa_class"
237
            : '';
238
239
        $ipcJoin = '';
240
        $whereClause = 'rev_actor = :actorId';
241
        $params = [];
242
        if ($user->isIpRange()) {
243
            $ipcTable = $project->getTableName('ip_changes');
244
            $ipcJoin = "JOIN $ipcTable ON rev_id = ipc_rev_id";
245
            $whereClause = 'ipc_hex BETWEEN :startIp AND :endIp';
246
            [$params['startIp'], $params['endIp']] = IPUtils::parseRange($user->getUsername());
247
        }
248
249
        $sql = "SELECT c.page_namespace AS `namespace`, e.page_title,
250
                    c.page_is_redirect AS `redirect`, c.count $paSelect
251
                FROM
252
                (
253
                    SELECT b.page_namespace, b.page_is_redirect, b.rev_page, b.count
254
                        ,@rn := if(@ns = b.page_namespace, @rn + 1, 1) AS row_number
255
                        ,@ns := b.page_namespace AS dummy
256
                    FROM
257
                    (
258
                        SELECT page_namespace, page_is_redirect, rev_page, count(rev_page) AS count
259
                        FROM $revisionTable
260
                        $ipcJoin
261
                        JOIN $pageTable ON page_id = rev_page
262
                        WHERE $whereClause
263
                        $revDateConditions
264
                        GROUP BY page_namespace, rev_page
265
                    ) AS b
266
                    JOIN (SELECT @ns := NULL, @rn := 0) AS vars
267
                    ORDER BY b.page_namespace ASC, b.count DESC
268
                ) AS c
269
                JOIN $pageTable e ON e.page_id = c.rev_page
270
                WHERE c.row_number <= $limit";
271
        $resultQuery = $this->executeQuery($sql, $project, $user, 'all', $params);
272
        $result = $resultQuery->fetchAllAssociative();
273
274
        // Cache and return.
275
        return $this->setCache($cacheKey, $result);
276
    }
277
278
    /**
279
     * Get the top edits by a user to a single page.
280
     * @param Page $page
281
     * @param User $user
282
     * @param int|false $start Start date as Unix timestamp.
283
     * @param int|false $end End date as Unix timestamp.
284
     * @return string[][] Each row with keys 'id', 'timestamp', 'minor', 'length',
285
     *   'length_change', 'reverted', 'user_id', 'username', 'comment', 'parent_comment'
286
     */
287
    public function getTopEditsPage(Page $page, User $user, $start = false, $end = false): array
288
    {
289
        // Set up cache.
290
        $cacheKey = $this->getCacheKey(func_get_args(), 'topedits_page');
291
        if ($this->cache->hasItem($cacheKey)) {
292
            return $this->cache->getItem($cacheKey)->get();
293
        }
294
295
        $results = $this->queryTopEditsPage($page, $user, $start, $end, true);
296
297
        // Now we need to get the most recent revision, since the childrevs stuff excludes it.
298
        $lastRev = $this->queryTopEditsPage($page, $user, $start, $end, false);
299
        if (empty($results) || $lastRev[0]['id'] !== $results[0]['id']) {
300
            $results = array_merge($lastRev, $results);
301
        }
302
303
        // Cache and return.
304
        return $this->setCache($cacheKey, $results);
305
    }
306
307
    /**
308
     * The actual query to get the top edits by the user to the page.
309
     * Because of the way the main query works, we aren't given the most recent revision,
310
     * so we have to call this twice, once with $childRevs set to true and once with false.
311
     * @param Page $page
312
     * @param User $user
313
     * @param int|false $start Start date as Unix timestamp.
314
     * @param int|false $end End date as Unix timestamp.
315
     * @param boolean $childRevs Whether to include child revisions.
316
     * @return array Each row with keys 'id', 'timestamp', 'minor', 'length',
317
     *   'length_change', 'reverted', 'user_id', 'username', 'comment', 'parent_comment'
318
     */
319
    private function queryTopEditsPage(
320
        Page $page,
321
        User $user,
322
        $start = false,
323
        $end = false,
324
        bool $childRevs = false
325
    ): array {
326
        $project = $page->getProject();
327
        $revDateConditions = $this->getDateConditions($start, $end, false, 'revs.');
328
        $revTable = $project->getTableName('revision');
329
        $commentTable = $project->getTableName('comment');
330
        $tagTable = $project->getTableName('change_tag');
331
        $tagDefTable = $project->getTableName('change_tag_def');
332
333
        if ($childRevs) {
334
            $childSelect = ", (
335
                    CASE WHEN
336
                        childrevs.rev_sha1 = parentrevs.rev_sha1
337
                        OR (
338
                            SELECT 1
339
                            FROM $tagTable
340
                            WHERE ct_rev_id = revs.rev_id
341
                            AND ct_tag_id = (
342
                                SELECT ctd_id
343
                                FROM $tagDefTable
344
                                WHERE ctd_name = 'mw-reverted'
345
                            )
346
                        )
347
                    THEN 1
348
                    ELSE 0
349
                    END
350
                ) AS `reverted`,
351
                childcomments.comment_text AS `parent_comment`";
352
            $childJoin = "LEFT JOIN $revTable AS childrevs ON (revs.rev_id = childrevs.rev_parent_id)
353
                LEFT OUTER JOIN $commentTable AS childcomments
354
                ON (childrevs.rev_comment_id = childcomments.comment_id)";
355
            $childWhere = 'AND childrevs.rev_page = :pageid';
356
            $childLimit = '';
357
        } else {
358
            $childSelect = ', "" AS parent_comment, 0 AS reverted';
359
            $childJoin = '';
360
            $childWhere = '';
361
            $childLimit = 'LIMIT 1';
362
        }
363
364
        $userId = $this->getProjectsConnection($project)->quote($user->getId($page->getProject()), PDO::PARAM_STR);
365
        $username = $this->getProjectsConnection($project)->quote($user->getUsername(), PDO::PARAM_STR);
366
367
        // IP range handling.
368
        $ipcJoin = '';
369
        $whereClause = 'revs.rev_actor = :actorId';
370
        $params = ['pageid' => $page->getId()];
371
        if ($user->isIpRange()) {
372
            $ipcTable = $project->getTableName('ip_changes');
373
            $ipcJoin = "JOIN $ipcTable ON revs.rev_id = ipc_rev_id";
374
            $whereClause = 'ipc_hex BETWEEN :startIp AND :endIp';
375
            [$params['startIp'], $params['endIp']] = IPUtils::parseRange($user->getUsername());
376
        }
377
378
        $sql = "SELECT * FROM (
379
                    SELECT
380
                        revs.rev_id AS id,
381
                        revs.rev_timestamp AS timestamp,
382
                        revs.rev_minor_edit AS minor,
383
                        revs.rev_len AS length,
384
                        (CAST(revs.rev_len AS SIGNED) - IFNULL(parentrevs.rev_len, 0)) AS length_change,
385
                        $userId AS user_id,
386
                        $username AS username,
387
                        comments.comment_text AS `comment`
388
                        $childSelect
389
                    FROM $revTable AS revs
390
                    LEFT JOIN $revTable AS parentrevs ON (revs.rev_parent_id = parentrevs.rev_id)
391
                    $ipcJoin
392
                    LEFT OUTER JOIN $commentTable AS comments ON (revs.rev_comment_id = comments.comment_id)
393
                    $childJoin
394
                    WHERE $whereClause
395
                    $revDateConditions
396
                    AND revs.rev_page = :pageid
397
                    $childWhere
398
                ) a
399
                ORDER BY timestamp DESC
400
                $childLimit";
401
402
        $resultQuery = $this->executeQuery($sql, $project, $user, null, $params);
403
        return $resultQuery->fetchAllAssociative();
404
    }
405
}
406