Passed
Pull Request — main (#442)
by MusikAnimal
07:58 queued 04:01
created

ArticleInfoRepository   A

Complexity

Total Complexity 18

Size/Duplication

Total Lines 323
Duplicated Lines 0 %

Importance

Changes 0
Metric Value
eloc 112
dl 0
loc 323
rs 10
c 0
b 0
f 0
wmc 18

8 Methods

Rating   Name   Duplication   Size   Complexity  
A getBotData() 0 50 3
A getTransclusionData() 0 31 3
A getBasicEditingInfo() 0 72 3
A getTopEditorsByEditCount() 0 49 3
A __construct() 0 13 1
A getEdit() 0 3 1
A getMaxPageRevisions() 0 6 2
A getLogEvents() 0 20 2
1
<?php
2
3
declare(strict_types = 1);
4
5
namespace App\Repository;
6
7
use App\Model\Edit;
8
use App\Model\Page;
9
use Doctrine\DBAL\Driver\ResultStatement;
10
use GuzzleHttp\Client;
11
use Psr\Cache\CacheItemPoolInterface;
12
use Psr\Container\ContainerInterface;
13
use Psr\Log\LoggerInterface;
14
15
/**
16
 * ArticleInfoRepository is responsible for retrieving data about a single
17
 * article on a given wiki.
18
 * @codeCoverageIgnore
19
 */
20
class ArticleInfoRepository extends Repository
21
{
22
    protected EditRepository $editRepo;
23
    protected UserRepository $userRepo;
24
25
    /** @var int Maximum number of revisions to process, as configured via app.max_page_revisions */
26
    protected int $maxPageRevisions;
27
28
    /**
29
     * @param ContainerInterface $container
30
     * @param CacheItemPoolInterface $cache
31
     * @param Client $guzzle
32
     * @param LoggerInterface $logger
33
     * @param bool $isWMF
34
     * @param int $queryTimeout
35
     * @param EditRepository $editRepo
36
     */
37
    public function __construct(
38
        ContainerInterface $container,
39
        CacheItemPoolInterface $cache,
40
        Client $guzzle,
41
        LoggerInterface $logger,
42
        bool $isWMF,
43
        int $queryTimeout,
44
        EditRepository $editRepo,
45
        UserRepository $userRepo
46
    ) {
47
        $this->editRepo = $editRepo;
48
        $this->userRepo = $userRepo;
49
        parent::__construct($container, $cache, $guzzle, $logger, $isWMF, $queryTimeout);
50
    }
51
52
    /**
53
     * Get the performance maximum on the number of revisions to process.
54
     * @return int
55
     */
56
    public function getMaxPageRevisions(): int
57
    {
58
        if (!isset($this->maxPageRevisions)) {
59
            $this->maxPageRevisions = (int)$this->container->getParameter('app.max_page_revisions');
0 ignored issues
show
Bug introduced by
The method getParameter() does not exist on Psr\Container\ContainerInterface. It seems like you code against a sub-type of Psr\Container\ContainerInterface such as Symfony\Component\Depend...tion\ContainerInterface. ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-call  annotation

59
            $this->maxPageRevisions = (int)$this->container->/** @scrutinizer ignore-call */ getParameter('app.max_page_revisions');
Loading history...
60
        }
61
        return $this->maxPageRevisions;
62
    }
63
64
    /**
65
     * Factory to instantiate a new Edit for the given revision.
66
     * @param Page $page
67
     * @param array $revision
68
     * @return Edit
69
     */
70
    public function getEdit(Page $page, array $revision): Edit
71
    {
72
        return new Edit($this->editRepo, $this->userRepo, $page, $revision);
73
    }
74
75
    /**
76
     * Get the number of edits made to the page by bots or former bots.
77
     * @param Page $page
78
     * @param false|int $start
79
     * @param false|int $end
80
     * @param ?int $limit
81
     * @param bool $count Return a count rather than the full set of rows.
82
     * @return ResultStatement resolving with keys 'count', 'username' and 'current'.
83
     */
84
    public function getBotData(Page $page, $start, $end, ?int $limit, bool $count = false): ResultStatement
85
    {
86
        $project = $page->getProject();
87
        $revTable = $project->getTableName('revision');
88
        $userGroupsTable = $project->getTableName('user_groups');
89
        $userFormerGroupsTable = $project->getTableName('user_former_groups');
90
        $actorTable = $project->getTableName('actor', 'revision');
91
92
        $datesConditions = $this->getDateConditions($start, $end);
93
94
        if ($count) {
95
            $actorSelect = '';
96
            $groupBy = '';
97
        } else {
98
            $actorSelect = 'actor_name AS username, ';
99
            $groupBy = 'GROUP BY actor_user';
100
        }
101
102
        $limitClause = '';
103
        if (null !== $limit) {
104
            $limitClause = "LIMIT $limit";
105
        }
106
107
        $sql = "SELECT COUNT(DISTINCT rev_id) AS count, $actorSelect '0' AS current
108
                FROM (
109
                    SELECT rev_id, rev_actor, rev_timestamp
110
                    FROM $revTable
111
                    WHERE rev_page = :pageId
112
                    ORDER BY rev_timestamp DESC
113
                    $limitClause
114
                ) a
115
                JOIN $actorTable ON actor_id = rev_actor
116
                LEFT JOIN $userFormerGroupsTable ON actor_user = ufg_user
117
                WHERE ufg_group = 'bot' $datesConditions
118
                $groupBy
119
                UNION
120
                SELECT COUNT(DISTINCT rev_id) AS count, $actorSelect '1' AS current
121
                FROM (
122
                    SELECT rev_id, rev_actor, rev_timestamp
123
                    FROM $revTable
124
                    WHERE rev_page = :pageId
125
                    ORDER BY rev_timestamp DESC
126
                    $limitClause
127
                ) a
128
                JOIN $actorTable ON actor_id = rev_actor
129
                LEFT JOIN $userGroupsTable ON actor_user = ug_user
130
                WHERE ug_group = 'bot' $datesConditions
131
                $groupBy";
132
133
        return $this->executeProjectsQuery($project, $sql, ['pageId' => $page->getId()]);
134
    }
135
136
    /**
137
     * Get prior deletions, page moves, and protections to the page.
138
     * @param Page $page
139
     * @param false|int $start
140
     * @param false|int $end
141
     * @return string[] each entry with keys 'log_action', 'log_type' and 'timestamp'.
142
     */
143
    public function getLogEvents(Page $page, $start, $end): array
144
    {
145
        $cacheKey = $this->getCacheKey(func_get_args(), 'page_logevents');
146
        if ($this->cache->hasItem($cacheKey)) {
147
            return $this->cache->getItem($cacheKey)->get();
148
        }
149
        $loggingTable = $page->getProject()->getTableName('logging', 'logindex');
150
151
        $datesConditions = $this->getDateConditions($start, $end, false, '', 'log_timestamp');
152
153
        $sql = "SELECT log_action, log_type, log_timestamp AS 'timestamp'
154
                FROM $loggingTable
155
                WHERE log_namespace = '" . $page->getNamespace() . "'
156
                AND log_title = :title AND log_timestamp > 1 $datesConditions
157
                AND log_type IN ('delete', 'move', 'protect', 'stable')";
158
        $title = str_replace(' ', '_', $page->getTitle());
159
160
        $result = $this->executeProjectsQuery($page->getProject(), $sql, ['title' => $title])
161
            ->fetchAllAssociative();
162
        return $this->setCache($cacheKey, $result);
163
    }
164
165
    /**
166
     * Get the number of categories, templates, and files that are on the page.
167
     * @param Page $page
168
     * @return array With keys 'categories', 'templates' and 'files'.
169
     */
170
    public function getTransclusionData(Page $page): array
171
    {
172
        $cacheKey = $this->getCacheKey(func_get_args(), 'page_transclusions');
173
        if ($this->cache->hasItem($cacheKey)) {
174
            return $this->cache->getItem($cacheKey)->get();
175
        }
176
177
        $categorylinksTable = $page->getProject()->getTableName('categorylinks');
178
        $templatelinksTable = $page->getProject()->getTableName('templatelinks');
179
        $imagelinksTable = $page->getProject()->getTableName('imagelinks');
180
        $sql = "(
181
                    SELECT 'categories' AS `key`, COUNT(*) AS val
182
                    FROM $categorylinksTable
183
                    WHERE cl_from = :pageId
184
                ) UNION (
185
                    SELECT 'templates' AS `key`, COUNT(*) AS val
186
                    FROM $templatelinksTable
187
                    WHERE tl_from = :pageId
188
                ) UNION (
189
                    SELECT 'files' AS `key`, COUNT(*) AS val
190
                    FROM $imagelinksTable
191
                    WHERE il_from = :pageId
192
                )";
193
        $resultQuery = $this->executeProjectsQuery($page->getProject(), $sql, ['pageId' => $page->getId()]);
194
        $transclusionCounts = [];
195
196
        while ($result = $resultQuery->fetchAssociative()) {
197
            $transclusionCounts[$result['key']] = (int)$result['val'];
198
        }
199
200
        return $this->setCache($cacheKey, $transclusionCounts);
201
    }
202
203
    /**
204
     * Get the top editors to the page by edit count.
205
     * @param Page $page
206
     * @param false|int $start
207
     * @param false|int $end
208
     * @param int $limit
209
     * @param bool $noBots
210
     * @return array
211
     */
212
    public function getTopEditorsByEditCount(
213
        Page $page,
214
        $start = false,
215
        $end = false,
216
        int $limit = 20,
217
        bool $noBots = false
218
    ): array {
219
        $cacheKey = $this->getCacheKey(func_get_args(), 'page_topeditors');
220
        if ($this->cache->hasItem($cacheKey)) {
221
            return $this->cache->getItem($cacheKey)->get();
222
        }
223
224
        $project = $page->getProject();
225
        // Faster to use revision instead of revision_userindex in this case.
226
        $revTable = $project->getTableName('revision', '');
227
        $actorTable = $project->getTableName('actor');
228
229
        $dateConditions = $this->getDateConditions($start, $end);
230
231
        $sql = "SELECT actor_name AS username,
232
                    COUNT(rev_id) AS count,
233
                    SUM(rev_minor_edit) AS minor,
234
                    MIN(rev_timestamp) AS first_timestamp,
235
                    MIN(rev_id) AS first_revid,
236
                    MAX(rev_timestamp) AS latest_timestamp,
237
                    MAX(rev_id) AS latest_revid
238
                FROM $revTable
239
                JOIN $actorTable ON rev_actor = actor_id
240
                WHERE rev_page = :pageId $dateConditions";
241
242
        if ($noBots) {
243
            $userGroupsTable = $project->getTableName('user_groups');
244
            $sql .= "AND NOT EXISTS (
245
                         SELECT 1
246
                         FROM $userGroupsTable
247
                         WHERE ug_user = actor_user
248
                         AND ug_group = 'bot'
249
                     )";
250
        }
251
252
        $sql .= "GROUP BY actor_id
253
                 ORDER BY count DESC
254
                 LIMIT $limit";
255
256
        $result = $this->executeProjectsQuery($project, $sql, [
257
            'pageId' => $page->getId(),
258
        ])->fetchAllAssociative();
259
260
        return $this->setCache($cacheKey, $result);
261
    }
262
263
    /**
264
     * Get various basic info used in the API, including the number of revisions, unique authors, initial author
265
     * and edit count of the initial author. This is combined into one query for better performance. Caching is only
266
     * applied if it took considerable time to process, because using the gadget, this will get hit for a different page
267
     * constantly, where the likelihood of cache benefiting us is slim.
268
     * @param Page $page The page.
269
     * @return string[]|false false if the page was not found.
270
     */
271
    public function getBasicEditingInfo(Page $page)
272
    {
273
        $cacheKey = $this->getCacheKey(func_get_args(), 'page_basicinfo');
274
        if ($this->cache->hasItem($cacheKey)) {
275
            return $this->cache->getItem($cacheKey)->get();
276
        }
277
278
        $project = $page->getProject();
279
        $revTable = $project->getTableName('revision');
280
        $userTable = $project->getTableName('user');
281
        $pageTable = $project->getTableName('page');
282
        $actorTable = $project->getTableName('actor');
283
284
        $sql = "SELECT *, (
285
                    SELECT user_editcount
286
                    FROM $userTable
287
                    WHERE user_id = author_user_id
288
                ) AS author_editcount
289
                FROM (
290
                    (
291
                        SELECT COUNT(rev_id) AS num_edits,
292
                            COUNT(DISTINCT(rev_actor)) AS num_editors,
293
                            SUM(rev_minor_edit) AS minor_edits
294
                        FROM $revTable
295
                        WHERE rev_page = :pageid
296
                        AND rev_timestamp > 0 # Use rev_timestamp index
297
                    ) a,
298
                    (
299
                        # With really old pages, the rev_timestamp may need to be sorted ASC,
300
                        #   and the lowest rev_id may not be the first revision.
301
                        SELECT actor_name AS author,
302
                               actor_user AS author_user_id,
303
                               rev_timestamp AS created_at,
304
                               rev_id AS created_rev_id
305
                        FROM $revTable
306
                        JOIN $actorTable ON actor_id = rev_actor
307
                        WHERE rev_page = :pageid
308
                        ORDER BY rev_timestamp ASC
309
                        LIMIT 1
310
                    ) b,
311
                    (
312
                        SELECT rev_timestamp AS modified_at,
313
                               rev_id AS modified_rev_id
314
                        FROM $revTable
315
                        JOIN $pageTable ON page_id = rev_page
316
                        WHERE rev_page = :pageid
317
                        AND rev_id = page_latest
318
                    ) c
319
                )";
320
        $params = ['pageid' => $page->getId()];
321
322
        // Get current time so we can compare timestamps
323
        // and decide whether or to cache the result.
324
        $time1 = time();
325
326
        /**
327
         * This query can sometimes take too long to run for pages with tens of thousands
328
         * of revisions. This query is used by the ArticleInfo gadget, which shows basic
329
         * data in real-time, so if it takes too long than the user probably didn't even
330
         * wait to see the result. We'll pass 60 as the last parameter to executeProjectsQuery,
331
         * which will set the max_statement_time to 60 seconds.
332
         */
333
        $result = $this->executeProjectsQuery($project, $sql, $params, 60)->fetchAssociative();
334
335
        $time2 = time();
336
337
        // If it took over 5 seconds, cache the result for 20 minutes.
338
        if ($time2 - $time1 > 5) {
339
            $this->setCache($cacheKey, $result, 'PT20M');
340
        }
341
342
        return $result ?? false;
343
    }
344
}
345