Passed
Push — pageinfo ( 7c1380 )
by MusikAnimal
06:18
created

PageInfoRepository   A

Complexity

Total Complexity 24

Size/Duplication

Total Lines 413
Duplicated Lines 0 %

Importance

Changes 0
Metric Value
eloc 161
dl 0
loc 413
rs 10
c 0
b 0
f 0
wmc 24

9 Methods

Rating   Name   Duplication   Size   Complexity  
A getLogEvents() 0 20 2
A getEdit() 0 3 1
A getMaxPageRevisions() 0 6 2
B getAutoEditsCounts() 0 62 6
A __construct() 0 27 1
A getBasicEditingInfo() 0 74 3
A getTopEditorsByEditCount() 0 49 3
A getBotData() 0 50 3
A getTransclusionData() 0 31 3
1
<?php
2
3
declare(strict_types = 1);
4
5
namespace App\Repository;
6
7
use App\Helper\AutomatedEditsHelper;
8
use App\Model\Edit;
9
use App\Model\Page;
10
use Doctrine\DBAL\Driver\ResultStatement;
11
use Doctrine\Persistence\ManagerRegistry;
12
use GuzzleHttp\Client;
13
use PDO;
14
use Psr\Cache\CacheItemPoolInterface;
15
use Psr\Log\LoggerInterface;
16
use Symfony\Component\DependencyInjection\ParameterBag\ParameterBagInterface;
17
use Symfony\Component\HttpFoundation\RequestStack;
18
19
/**
20
 * PageInfoRepository is responsible for retrieving data about a single page on a given wiki.
21
 * @codeCoverageIgnore
22
 */
23
class PageInfoRepository extends AutoEditsRepository
24
{
25
    protected EditRepository $editRepo;
26
    protected UserRepository $userRepo;
27
28
    /** @var int Maximum number of revisions to process, as configured via APP_MAX_PAGE_REVISIONS */
29
    protected int $maxPageRevisions;
30
31
    /**
32
     * @param ManagerRegistry $managerRegistry
33
     * @param CacheItemPoolInterface $cache
34
     * @param Client $guzzle
35
     * @param LoggerInterface $logger
36
     * @param ParameterBagInterface $parameterBag
37
     * @param bool $isWMF
38
     * @param int $queryTimeout
39
     * @param EditRepository $editRepo
40
     * @param UserRepository $userRepo
41
     */
42
    public function __construct(
43
        ManagerRegistry $managerRegistry,
44
        CacheItemPoolInterface $cache,
45
        Client $guzzle,
46
        LoggerInterface $logger,
47
        ParameterBagInterface $parameterBag,
48
        bool $isWMF,
49
        int $queryTimeout,
50
        EditRepository $editRepo,
51
        UserRepository $userRepo,
52
        ProjectRepository $projectRepo,
53
        AutomatedEditsHelper $autoEditsHelper,
54
        RequestStack $requestStack
55
    ) {
56
        $this->editRepo = $editRepo;
57
        $this->userRepo = $userRepo;
58
        parent::__construct(
59
            $managerRegistry,
60
            $cache,
61
            $guzzle,
62
            $logger,
63
            $parameterBag,
64
            $isWMF,
65
            $queryTimeout,
66
            $projectRepo,
67
            $autoEditsHelper,
68
            $requestStack
69
        );
70
    }
71
72
    /**
73
     * Get the performance maximum on the number of revisions to process.
74
     * @return int
75
     */
76
    public function getMaxPageRevisions(): int
77
    {
78
        if (!isset($this->maxPageRevisions)) {
79
            $this->maxPageRevisions = (int)$this->parameterBag->get('app.max_page_revisions');
80
        }
81
        return $this->maxPageRevisions;
82
    }
83
84
    /**
85
     * Factory to instantiate a new Edit for the given revision.
86
     * @param Page $page
87
     * @param array $revision
88
     * @return Edit
89
     */
90
    public function getEdit(Page $page, array $revision): Edit
91
    {
92
        return new Edit($this->editRepo, $this->userRepo, $page, $revision);
93
    }
94
95
    /**
96
     * Get the number of edits made to the page by bots or former bots.
97
     * @param Page $page
98
     * @param false|int $start
99
     * @param false|int $end
100
     * @param ?int $limit
101
     * @param bool $count Return a count rather than the full set of rows.
102
     * @return ResultStatement resolving with keys 'count', 'username' and 'current'.
103
     */
104
    public function getBotData(Page $page, $start, $end, ?int $limit, bool $count = false): ResultStatement
105
    {
106
        $project = $page->getProject();
107
        $revTable = $project->getTableName('revision');
108
        $userGroupsTable = $project->getTableName('user_groups');
109
        $userFormerGroupsTable = $project->getTableName('user_former_groups');
110
        $actorTable = $project->getTableName('actor', 'revision');
111
112
        $datesConditions = $this->getDateConditions($start, $end);
113
114
        if ($count) {
115
            $actorSelect = '';
116
            $groupBy = '';
117
        } else {
118
            $actorSelect = 'actor_name AS username, ';
119
            $groupBy = 'GROUP BY actor_user';
120
        }
121
122
        $limitClause = '';
123
        if (null !== $limit) {
124
            $limitClause = "LIMIT $limit";
125
        }
126
127
        $sql = "SELECT COUNT(DISTINCT rev_id) AS `count`, $actorSelect '0' AS `current`
128
                FROM (
129
                    SELECT rev_id, rev_actor, rev_timestamp
130
                    FROM $revTable
131
                    WHERE rev_page = :pageId
132
                    ORDER BY rev_timestamp DESC
133
                    $limitClause
134
                ) a
135
                JOIN $actorTable ON actor_id = rev_actor
136
                LEFT JOIN $userFormerGroupsTable ON actor_user = ufg_user
137
                WHERE ufg_group = 'bot' $datesConditions
138
                $groupBy
139
                UNION
140
                SELECT COUNT(DISTINCT rev_id) AS count, $actorSelect '1' AS current
141
                FROM (
142
                    SELECT rev_id, rev_actor, rev_timestamp
143
                    FROM $revTable
144
                    WHERE rev_page = :pageId
145
                    ORDER BY rev_timestamp DESC
146
                    $limitClause
147
                ) a
148
                JOIN $actorTable ON actor_id = rev_actor
149
                LEFT JOIN $userGroupsTable ON actor_user = ug_user
150
                WHERE ug_group = 'bot' $datesConditions
151
                $groupBy";
152
153
        return $this->executeProjectsQuery($project, $sql, ['pageId' => $page->getId()]);
154
    }
155
156
    /**
157
     * Get prior deletions, page moves, and protections to the page.
158
     * @param Page $page
159
     * @param false|int $start
160
     * @param false|int $end
161
     * @return string[] each entry with keys 'log_action', 'log_type' and 'timestamp'.
162
     */
163
    public function getLogEvents(Page $page, $start, $end): array
164
    {
165
        $cacheKey = $this->getCacheKey(func_get_args(), 'page_logevents');
166
        if ($this->cache->hasItem($cacheKey)) {
167
            return $this->cache->getItem($cacheKey)->get();
168
        }
169
        $loggingTable = $page->getProject()->getTableName('logging', 'logindex');
170
171
        $datesConditions = $this->getDateConditions($start, $end, false, '', 'log_timestamp');
172
173
        $sql = "SELECT log_action, log_type, log_timestamp AS 'timestamp'
174
                FROM $loggingTable
175
                WHERE log_namespace = '" . $page->getNamespace() . "'
176
                AND log_title = :title AND log_timestamp > 1 $datesConditions
177
                AND log_type IN ('delete', 'move', 'protect', 'stable')";
178
        $title = str_replace(' ', '_', $page->getTitle());
179
180
        $result = $this->executeProjectsQuery($page->getProject(), $sql, ['title' => $title])
181
            ->fetchAllAssociative();
182
        return $this->setCache($cacheKey, $result);
183
    }
184
185
    /**
186
     * Get the number of categories, templates, and files that are on the page.
187
     * @param Page $page
188
     * @return array With keys 'categories', 'templates' and 'files'.
189
     */
190
    public function getTransclusionData(Page $page): array
191
    {
192
        $cacheKey = $this->getCacheKey(func_get_args(), 'page_transclusions');
193
        if ($this->cache->hasItem($cacheKey)) {
194
            return $this->cache->getItem($cacheKey)->get();
195
        }
196
197
        $categorylinksTable = $page->getProject()->getTableName('categorylinks');
198
        $templatelinksTable = $page->getProject()->getTableName('templatelinks');
199
        $imagelinksTable = $page->getProject()->getTableName('imagelinks');
200
        $sql = "(
201
                    SELECT 'categories' AS `key`, COUNT(*) AS val
202
                    FROM $categorylinksTable
203
                    WHERE cl_from = :pageId
204
                ) UNION (
205
                    SELECT 'templates' AS `key`, COUNT(*) AS val
206
                    FROM $templatelinksTable
207
                    WHERE tl_from = :pageId
208
                ) UNION (
209
                    SELECT 'files' AS `key`, COUNT(*) AS val
210
                    FROM $imagelinksTable
211
                    WHERE il_from = :pageId
212
                )";
213
        $resultQuery = $this->executeProjectsQuery($page->getProject(), $sql, ['pageId' => $page->getId()]);
214
        $transclusionCounts = [];
215
216
        while ($result = $resultQuery->fetchAssociative()) {
217
            $transclusionCounts[$result['key']] = (int)$result['val'];
218
        }
219
220
        return $this->setCache($cacheKey, $transclusionCounts);
221
    }
222
223
    /**
224
     * Get the top editors to the page by edit count.
225
     * @param Page $page
226
     * @param false|int $start
227
     * @param false|int $end
228
     * @param int $limit
229
     * @param bool $noBots
230
     * @return array
231
     */
232
    public function getTopEditorsByEditCount(
233
        Page $page,
234
        $start = false,
235
        $end = false,
236
        int $limit = 20,
237
        bool $noBots = false
238
    ): array {
239
        $cacheKey = $this->getCacheKey(func_get_args(), 'page_topeditors');
240
        if ($this->cache->hasItem($cacheKey)) {
241
            return $this->cache->getItem($cacheKey)->get();
242
        }
243
244
        $project = $page->getProject();
245
        // Faster to use revision instead of revision_userindex in this case.
246
        $revTable = $project->getTableName('revision', '');
247
        $actorTable = $project->getTableName('actor');
248
249
        $dateConditions = $this->getDateConditions($start, $end);
250
251
        $sql = "SELECT actor_name AS username,
252
                    COUNT(rev_id) AS count,
253
                    SUM(rev_minor_edit) AS minor,
254
                    MIN(rev_timestamp) AS first_timestamp,
255
                    MIN(rev_id) AS first_revid,
256
                    MAX(rev_timestamp) AS latest_timestamp,
257
                    MAX(rev_id) AS latest_revid
258
                FROM $revTable
259
                JOIN $actorTable ON rev_actor = actor_id
260
                WHERE rev_page = :pageId $dateConditions";
261
262
        if ($noBots) {
263
            $userGroupsTable = $project->getTableName('user_groups');
264
            $sql .= "AND NOT EXISTS (
265
                         SELECT 1
266
                         FROM $userGroupsTable
267
                         WHERE ug_user = actor_user
268
                         AND ug_group = 'bot'
269
                     )";
270
        }
271
272
        $sql .= "GROUP BY actor_id
273
                 ORDER BY count DESC
274
                 LIMIT $limit";
275
276
        $result = $this->executeProjectsQuery($project, $sql, [
277
            'pageId' => $page->getId(),
278
        ])->fetchAllAssociative();
279
280
        return $this->setCache($cacheKey, $result);
281
    }
282
283
    /**
284
     * Get various basic info used in the API, including the number of revisions, unique authors, initial author
285
     * and edit count of the initial author. This is combined into one query for better performance. Caching is only
286
     * applied if it took considerable time to process, because using the gadget, this will get hit for a different page
287
     * constantly, where the likelihood of cache benefiting us is slim.
288
     * @param Page $page The page.
289
     * @return string[]|false false if the page was not found.
290
     */
291
    public function getBasicEditingInfo(Page $page)
292
    {
293
        $cacheKey = $this->getCacheKey(func_get_args(), 'page_basicinfo');
294
        if ($this->cache->hasItem($cacheKey)) {
295
            return $this->cache->getItem($cacheKey)->get();
296
        }
297
298
        $project = $page->getProject();
299
        $revTable = $project->getTableName('revision');
300
        $userTable = $project->getTableName('user');
301
        $pageTable = $project->getTableName('page');
302
        $actorTable = $project->getTableName('actor');
303
304
        $sql = "SELECT *, (
305
                    SELECT user_editcount
306
                    FROM $userTable
307
                    WHERE user_id = author_user_id
308
                ) AS author_editcount
309
                FROM (
310
                    (
311
                        SELECT COUNT(rev_id) AS num_edits,
312
                            COUNT(DISTINCT(rev_actor)) AS num_editors,
313
                            SUM(actor_user IS NULL) AS ip_edits,
314
                            SUM(rev_minor_edit) AS minor_edits
315
                        FROM $revTable
316
                        JOIN $actorTable ON actor_id = rev_actor
317
                        WHERE rev_page = :pageid
318
                        AND rev_timestamp > 0 # Use rev_timestamp index
319
                    ) a,
320
                    (
321
                        # With really old pages, the rev_timestamp may need to be sorted ASC,
322
                        #   and the lowest rev_id may not be the first revision.
323
                        SELECT actor_name AS author,
324
                               actor_user AS author_user_id,
325
                               rev_timestamp AS created_at,
326
                               rev_id AS created_rev_id
327
                        FROM $revTable
328
                        JOIN $actorTable ON actor_id = rev_actor
329
                        WHERE rev_page = :pageid
330
                        ORDER BY rev_timestamp ASC
331
                        LIMIT 1
332
                    ) b,
333
                    (
334
                        SELECT rev_timestamp AS modified_at,
335
                               rev_id AS modified_rev_id
336
                        FROM $revTable
337
                        JOIN $pageTable ON page_id = rev_page
338
                        WHERE rev_page = :pageid
339
                        AND rev_id = page_latest
340
                    ) c
341
                )";
342
        $params = ['pageid' => $page->getId()];
343
344
        // Get current time so we can compare timestamps
345
        // and decide whether or to cache the result.
346
        $time1 = time();
347
348
        /**
349
         * This query can sometimes take too long to run for pages with tens of thousands
350
         * of revisions. This query is used by the PageInfo gadget, which shows basic
351
         * data in real-time, so if it takes too long than the user probably didn't even
352
         * wait to see the result. We'll pass 60 as the last parameter to executeProjectsQuery,
353
         * which will set the max_statement_time to 60 seconds.
354
         */
355
        $result = $this->executeProjectsQuery($project, $sql, $params, 60)->fetchAssociative();
356
357
        $time2 = time();
358
359
        // If it took over 5 seconds, cache the result for 20 minutes.
360
        if ($time2 - $time1 > 5) {
361
            $this->setCache($cacheKey, $result, 'PT20M');
362
        }
363
364
        return $result ?? false;
365
    }
366
367
    /**
368
     * Get counts of (semi-)automated tools that were used to edit the page.
369
     * @param Page $page
370
     * @param $start
371
     * @param $end
372
     * @return array
373
     */
374
    public function getAutoEditsCounts(Page $page, $start, $end): array
375
    {
376
        $cacheKey = $this->getCacheKey(func_get_args(), 'user_autoeditcount');
377
        if ($this->cache->hasItem($cacheKey)) {
378
            return $this->cache->getItem($cacheKey)->get();
379
        }
380
381
        $project = $page->getProject();
382
        $tools = $this->getTools($project);
383
        $queries = [];
384
        $revisionTable = $project->getTableName('revision', '');
385
        $pageTable = $project->getTableName('page');
386
        $pageJoin = "LEFT JOIN $pageTable ON rev_page = page_id";
387
        $revDateConditions = $this->getDateConditions($start, $end);
388
        $conn = $this->getProjectsConnection($project);
389
390
        foreach ($tools as $toolName => $values) {
391
            [$condTool, $commentJoin, $tagJoin] = $this->getInnerAutomatedCountsSql($project, $toolName, $values);
392
            $toolName = $conn->quote($toolName, PDO::PARAM_STR);
393
394
            // No regex or tag provided for this tool. This can happen for tag-only tools that are in the global
395
            // configuration, but no local tag exists on the said project.
396
            if ('' === $condTool) {
397
                continue;
398
            }
399
400
            $queries[] .= "
401
                SELECT $toolName AS toolname, COUNT(DISTINCT(rev_id)) AS count
402
                FROM $revisionTable
403
                $pageJoin
404
                $commentJoin
405
                $tagJoin
406
                WHERE $condTool
407
                    AND rev_page = :pageId
408
                $revDateConditions";
409
        }
410
411
        $sql = implode(' UNION ', $queries);
412
        $resultQuery = $this->executeProjectsQuery($project, $sql, [
413
            'pageId' => $page->getId(),
414
        ]);
415
416
        $results = [];
417
418
        while ($row = $resultQuery->fetchAssociative()) {
419
            // Only track tools that they've used at least once
420
            $tool = $row['toolname'];
421
            if ($row['count'] > 0) {
422
                $results[$tool] = [
423
                    'link' => $tools[$tool]['link'],
424
                    'label' => $tools[$tool]['label'] ?? $tool,
425
                    'count' => $row['count'],
426
                ];
427
            }
428
        }
429
430
        // Sort the array by count
431
        uasort($results, function ($a, $b) {
432
            return $b['count'] - $a['count'];
433
        });
434
435
        return $this->setCache($cacheKey, $results);
436
    }
437
}
438