Passed
Push — pageinfo ( 7c1380 )
by MusikAnimal
06:18
created

PageInfoRepository::__construct()   A

Complexity

Conditions 1
Paths 1

Size

Total Lines 27
Code Lines 13

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
cc 1
eloc 13
nc 1
nop 12
dl 0
loc 27
rs 9.8333
c 0
b 0
f 0

How to fix   Many Parameters   

Many Parameters

Methods with many parameters are not only hard to understand, but their parameters also often become inconsistent when you need more, or different data.

There are several approaches to avoid long parameter lists:

1
<?php
2
3
declare(strict_types = 1);
4
5
namespace App\Repository;
6
7
use App\Helper\AutomatedEditsHelper;
8
use App\Model\Edit;
9
use App\Model\Page;
10
use Doctrine\DBAL\Driver\ResultStatement;
11
use Doctrine\Persistence\ManagerRegistry;
12
use GuzzleHttp\Client;
13
use PDO;
14
use Psr\Cache\CacheItemPoolInterface;
15
use Psr\Log\LoggerInterface;
16
use Symfony\Component\DependencyInjection\ParameterBag\ParameterBagInterface;
17
use Symfony\Component\HttpFoundation\RequestStack;
18
19
/**
20
 * PageInfoRepository is responsible for retrieving data about a single page on a given wiki.
21
 * @codeCoverageIgnore
22
 */
23
class PageInfoRepository extends AutoEditsRepository
24
{
25
    protected EditRepository $editRepo;
26
    protected UserRepository $userRepo;
27
28
    /** @var int Maximum number of revisions to process, as configured via APP_MAX_PAGE_REVISIONS */
29
    protected int $maxPageRevisions;
30
31
    /**
32
     * @param ManagerRegistry $managerRegistry
33
     * @param CacheItemPoolInterface $cache
34
     * @param Client $guzzle
35
     * @param LoggerInterface $logger
36
     * @param ParameterBagInterface $parameterBag
37
     * @param bool $isWMF
38
     * @param int $queryTimeout
39
     * @param EditRepository $editRepo
40
     * @param UserRepository $userRepo
41
     */
42
    public function __construct(
43
        ManagerRegistry $managerRegistry,
44
        CacheItemPoolInterface $cache,
45
        Client $guzzle,
46
        LoggerInterface $logger,
47
        ParameterBagInterface $parameterBag,
48
        bool $isWMF,
49
        int $queryTimeout,
50
        EditRepository $editRepo,
51
        UserRepository $userRepo,
52
        ProjectRepository $projectRepo,
53
        AutomatedEditsHelper $autoEditsHelper,
54
        RequestStack $requestStack
55
    ) {
56
        $this->editRepo = $editRepo;
57
        $this->userRepo = $userRepo;
58
        parent::__construct(
59
            $managerRegistry,
60
            $cache,
61
            $guzzle,
62
            $logger,
63
            $parameterBag,
64
            $isWMF,
65
            $queryTimeout,
66
            $projectRepo,
67
            $autoEditsHelper,
68
            $requestStack
69
        );
70
    }
71
72
    /**
73
     * Get the performance maximum on the number of revisions to process.
74
     * @return int
75
     */
76
    public function getMaxPageRevisions(): int
77
    {
78
        if (!isset($this->maxPageRevisions)) {
79
            $this->maxPageRevisions = (int)$this->parameterBag->get('app.max_page_revisions');
80
        }
81
        return $this->maxPageRevisions;
82
    }
83
84
    /**
85
     * Factory to instantiate a new Edit for the given revision.
86
     * @param Page $page
87
     * @param array $revision
88
     * @return Edit
89
     */
90
    public function getEdit(Page $page, array $revision): Edit
91
    {
92
        return new Edit($this->editRepo, $this->userRepo, $page, $revision);
93
    }
94
95
    /**
96
     * Get the number of edits made to the page by bots or former bots.
97
     * @param Page $page
98
     * @param false|int $start
99
     * @param false|int $end
100
     * @param ?int $limit
101
     * @param bool $count Return a count rather than the full set of rows.
102
     * @return ResultStatement resolving with keys 'count', 'username' and 'current'.
103
     */
104
    public function getBotData(Page $page, $start, $end, ?int $limit, bool $count = false): ResultStatement
105
    {
106
        $project = $page->getProject();
107
        $revTable = $project->getTableName('revision');
108
        $userGroupsTable = $project->getTableName('user_groups');
109
        $userFormerGroupsTable = $project->getTableName('user_former_groups');
110
        $actorTable = $project->getTableName('actor', 'revision');
111
112
        $datesConditions = $this->getDateConditions($start, $end);
113
114
        if ($count) {
115
            $actorSelect = '';
116
            $groupBy = '';
117
        } else {
118
            $actorSelect = 'actor_name AS username, ';
119
            $groupBy = 'GROUP BY actor_user';
120
        }
121
122
        $limitClause = '';
123
        if (null !== $limit) {
124
            $limitClause = "LIMIT $limit";
125
        }
126
127
        $sql = "SELECT COUNT(DISTINCT rev_id) AS `count`, $actorSelect '0' AS `current`
128
                FROM (
129
                    SELECT rev_id, rev_actor, rev_timestamp
130
                    FROM $revTable
131
                    WHERE rev_page = :pageId
132
                    ORDER BY rev_timestamp DESC
133
                    $limitClause
134
                ) a
135
                JOIN $actorTable ON actor_id = rev_actor
136
                LEFT JOIN $userFormerGroupsTable ON actor_user = ufg_user
137
                WHERE ufg_group = 'bot' $datesConditions
138
                $groupBy
139
                UNION
140
                SELECT COUNT(DISTINCT rev_id) AS count, $actorSelect '1' AS current
141
                FROM (
142
                    SELECT rev_id, rev_actor, rev_timestamp
143
                    FROM $revTable
144
                    WHERE rev_page = :pageId
145
                    ORDER BY rev_timestamp DESC
146
                    $limitClause
147
                ) a
148
                JOIN $actorTable ON actor_id = rev_actor
149
                LEFT JOIN $userGroupsTable ON actor_user = ug_user
150
                WHERE ug_group = 'bot' $datesConditions
151
                $groupBy";
152
153
        return $this->executeProjectsQuery($project, $sql, ['pageId' => $page->getId()]);
154
    }
155
156
    /**
157
     * Get prior deletions, page moves, and protections to the page.
158
     * @param Page $page
159
     * @param false|int $start
160
     * @param false|int $end
161
     * @return string[] each entry with keys 'log_action', 'log_type' and 'timestamp'.
162
     */
163
    public function getLogEvents(Page $page, $start, $end): array
164
    {
165
        $cacheKey = $this->getCacheKey(func_get_args(), 'page_logevents');
166
        if ($this->cache->hasItem($cacheKey)) {
167
            return $this->cache->getItem($cacheKey)->get();
168
        }
169
        $loggingTable = $page->getProject()->getTableName('logging', 'logindex');
170
171
        $datesConditions = $this->getDateConditions($start, $end, false, '', 'log_timestamp');
172
173
        $sql = "SELECT log_action, log_type, log_timestamp AS 'timestamp'
174
                FROM $loggingTable
175
                WHERE log_namespace = '" . $page->getNamespace() . "'
176
                AND log_title = :title AND log_timestamp > 1 $datesConditions
177
                AND log_type IN ('delete', 'move', 'protect', 'stable')";
178
        $title = str_replace(' ', '_', $page->getTitle());
179
180
        $result = $this->executeProjectsQuery($page->getProject(), $sql, ['title' => $title])
181
            ->fetchAllAssociative();
182
        return $this->setCache($cacheKey, $result);
183
    }
184
185
    /**
186
     * Get the number of categories, templates, and files that are on the page.
187
     * @param Page $page
188
     * @return array With keys 'categories', 'templates' and 'files'.
189
     */
190
    public function getTransclusionData(Page $page): array
191
    {
192
        $cacheKey = $this->getCacheKey(func_get_args(), 'page_transclusions');
193
        if ($this->cache->hasItem($cacheKey)) {
194
            return $this->cache->getItem($cacheKey)->get();
195
        }
196
197
        $categorylinksTable = $page->getProject()->getTableName('categorylinks');
198
        $templatelinksTable = $page->getProject()->getTableName('templatelinks');
199
        $imagelinksTable = $page->getProject()->getTableName('imagelinks');
200
        $sql = "(
201
                    SELECT 'categories' AS `key`, COUNT(*) AS val
202
                    FROM $categorylinksTable
203
                    WHERE cl_from = :pageId
204
                ) UNION (
205
                    SELECT 'templates' AS `key`, COUNT(*) AS val
206
                    FROM $templatelinksTable
207
                    WHERE tl_from = :pageId
208
                ) UNION (
209
                    SELECT 'files' AS `key`, COUNT(*) AS val
210
                    FROM $imagelinksTable
211
                    WHERE il_from = :pageId
212
                )";
213
        $resultQuery = $this->executeProjectsQuery($page->getProject(), $sql, ['pageId' => $page->getId()]);
214
        $transclusionCounts = [];
215
216
        while ($result = $resultQuery->fetchAssociative()) {
217
            $transclusionCounts[$result['key']] = (int)$result['val'];
218
        }
219
220
        return $this->setCache($cacheKey, $transclusionCounts);
221
    }
222
223
    /**
224
     * Get the top editors to the page by edit count.
225
     * @param Page $page
226
     * @param false|int $start
227
     * @param false|int $end
228
     * @param int $limit
229
     * @param bool $noBots
230
     * @return array
231
     */
232
    public function getTopEditorsByEditCount(
233
        Page $page,
234
        $start = false,
235
        $end = false,
236
        int $limit = 20,
237
        bool $noBots = false
238
    ): array {
239
        $cacheKey = $this->getCacheKey(func_get_args(), 'page_topeditors');
240
        if ($this->cache->hasItem($cacheKey)) {
241
            return $this->cache->getItem($cacheKey)->get();
242
        }
243
244
        $project = $page->getProject();
245
        // Faster to use revision instead of revision_userindex in this case.
246
        $revTable = $project->getTableName('revision', '');
247
        $actorTable = $project->getTableName('actor');
248
249
        $dateConditions = $this->getDateConditions($start, $end);
250
251
        $sql = "SELECT actor_name AS username,
252
                    COUNT(rev_id) AS count,
253
                    SUM(rev_minor_edit) AS minor,
254
                    MIN(rev_timestamp) AS first_timestamp,
255
                    MIN(rev_id) AS first_revid,
256
                    MAX(rev_timestamp) AS latest_timestamp,
257
                    MAX(rev_id) AS latest_revid
258
                FROM $revTable
259
                JOIN $actorTable ON rev_actor = actor_id
260
                WHERE rev_page = :pageId $dateConditions";
261
262
        if ($noBots) {
263
            $userGroupsTable = $project->getTableName('user_groups');
264
            $sql .= "AND NOT EXISTS (
265
                         SELECT 1
266
                         FROM $userGroupsTable
267
                         WHERE ug_user = actor_user
268
                         AND ug_group = 'bot'
269
                     )";
270
        }
271
272
        $sql .= "GROUP BY actor_id
273
                 ORDER BY count DESC
274
                 LIMIT $limit";
275
276
        $result = $this->executeProjectsQuery($project, $sql, [
277
            'pageId' => $page->getId(),
278
        ])->fetchAllAssociative();
279
280
        return $this->setCache($cacheKey, $result);
281
    }
282
283
    /**
284
     * Get various basic info used in the API, including the number of revisions, unique authors, initial author
285
     * and edit count of the initial author. This is combined into one query for better performance. Caching is only
286
     * applied if it took considerable time to process, because using the gadget, this will get hit for a different page
287
     * constantly, where the likelihood of cache benefiting us is slim.
288
     * @param Page $page The page.
289
     * @return string[]|false false if the page was not found.
290
     */
291
    public function getBasicEditingInfo(Page $page)
292
    {
293
        $cacheKey = $this->getCacheKey(func_get_args(), 'page_basicinfo');
294
        if ($this->cache->hasItem($cacheKey)) {
295
            return $this->cache->getItem($cacheKey)->get();
296
        }
297
298
        $project = $page->getProject();
299
        $revTable = $project->getTableName('revision');
300
        $userTable = $project->getTableName('user');
301
        $pageTable = $project->getTableName('page');
302
        $actorTable = $project->getTableName('actor');
303
304
        $sql = "SELECT *, (
305
                    SELECT user_editcount
306
                    FROM $userTable
307
                    WHERE user_id = author_user_id
308
                ) AS author_editcount
309
                FROM (
310
                    (
311
                        SELECT COUNT(rev_id) AS num_edits,
312
                            COUNT(DISTINCT(rev_actor)) AS num_editors,
313
                            SUM(actor_user IS NULL) AS ip_edits,
314
                            SUM(rev_minor_edit) AS minor_edits
315
                        FROM $revTable
316
                        JOIN $actorTable ON actor_id = rev_actor
317
                        WHERE rev_page = :pageid
318
                        AND rev_timestamp > 0 # Use rev_timestamp index
319
                    ) a,
320
                    (
321
                        # With really old pages, the rev_timestamp may need to be sorted ASC,
322
                        #   and the lowest rev_id may not be the first revision.
323
                        SELECT actor_name AS author,
324
                               actor_user AS author_user_id,
325
                               rev_timestamp AS created_at,
326
                               rev_id AS created_rev_id
327
                        FROM $revTable
328
                        JOIN $actorTable ON actor_id = rev_actor
329
                        WHERE rev_page = :pageid
330
                        ORDER BY rev_timestamp ASC
331
                        LIMIT 1
332
                    ) b,
333
                    (
334
                        SELECT rev_timestamp AS modified_at,
335
                               rev_id AS modified_rev_id
336
                        FROM $revTable
337
                        JOIN $pageTable ON page_id = rev_page
338
                        WHERE rev_page = :pageid
339
                        AND rev_id = page_latest
340
                    ) c
341
                )";
342
        $params = ['pageid' => $page->getId()];
343
344
        // Get current time so we can compare timestamps
345
        // and decide whether or to cache the result.
346
        $time1 = time();
347
348
        /**
349
         * This query can sometimes take too long to run for pages with tens of thousands
350
         * of revisions. This query is used by the PageInfo gadget, which shows basic
351
         * data in real-time, so if it takes too long than the user probably didn't even
352
         * wait to see the result. We'll pass 60 as the last parameter to executeProjectsQuery,
353
         * which will set the max_statement_time to 60 seconds.
354
         */
355
        $result = $this->executeProjectsQuery($project, $sql, $params, 60)->fetchAssociative();
356
357
        $time2 = time();
358
359
        // If it took over 5 seconds, cache the result for 20 minutes.
360
        if ($time2 - $time1 > 5) {
361
            $this->setCache($cacheKey, $result, 'PT20M');
362
        }
363
364
        return $result ?? false;
365
    }
366
367
    /**
368
     * Get counts of (semi-)automated tools that were used to edit the page.
369
     * @param Page $page
370
     * @param $start
371
     * @param $end
372
     * @return array
373
     */
374
    public function getAutoEditsCounts(Page $page, $start, $end): array
375
    {
376
        $cacheKey = $this->getCacheKey(func_get_args(), 'user_autoeditcount');
377
        if ($this->cache->hasItem($cacheKey)) {
378
            return $this->cache->getItem($cacheKey)->get();
379
        }
380
381
        $project = $page->getProject();
382
        $tools = $this->getTools($project);
383
        $queries = [];
384
        $revisionTable = $project->getTableName('revision', '');
385
        $pageTable = $project->getTableName('page');
386
        $pageJoin = "LEFT JOIN $pageTable ON rev_page = page_id";
387
        $revDateConditions = $this->getDateConditions($start, $end);
388
        $conn = $this->getProjectsConnection($project);
389
390
        foreach ($tools as $toolName => $values) {
391
            [$condTool, $commentJoin, $tagJoin] = $this->getInnerAutomatedCountsSql($project, $toolName, $values);
392
            $toolName = $conn->quote($toolName, PDO::PARAM_STR);
393
394
            // No regex or tag provided for this tool. This can happen for tag-only tools that are in the global
395
            // configuration, but no local tag exists on the said project.
396
            if ('' === $condTool) {
397
                continue;
398
            }
399
400
            $queries[] .= "
401
                SELECT $toolName AS toolname, COUNT(DISTINCT(rev_id)) AS count
402
                FROM $revisionTable
403
                $pageJoin
404
                $commentJoin
405
                $tagJoin
406
                WHERE $condTool
407
                    AND rev_page = :pageId
408
                $revDateConditions";
409
        }
410
411
        $sql = implode(' UNION ', $queries);
412
        $resultQuery = $this->executeProjectsQuery($project, $sql, [
413
            'pageId' => $page->getId(),
414
        ]);
415
416
        $results = [];
417
418
        while ($row = $resultQuery->fetchAssociative()) {
419
            // Only track tools that they've used at least once
420
            $tool = $row['toolname'];
421
            if ($row['count'] > 0) {
422
                $results[$tool] = [
423
                    'link' => $tools[$tool]['link'],
424
                    'label' => $tools[$tool]['label'] ?? $tool,
425
                    'count' => $row['count'],
426
                ];
427
            }
428
        }
429
430
        // Sort the array by count
431
        uasort($results, function ($a, $b) {
432
            return $b['count'] - $a['count'];
433
        });
434
435
        return $this->setCache($cacheKey, $results);
436
    }
437
}
438