Passed
Pull Request — main (#442)
by MusikAnimal
08:15 queued 04:14
created

ArticleInfoRepository::__construct()   A

Complexity

Conditions 1
Paths 1

Size

Total Lines 13
Code Lines 3

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
cc 1
eloc 3
nc 1
nop 8
dl 0
loc 13
rs 10
c 0
b 0
f 0

How to fix   Many Parameters   

Many Parameters

Methods with many parameters are not only hard to understand, but their parameters also often become inconsistent when you need more, or different data.

There are several approaches to avoid long parameter lists:

1
<?php
2
3
declare(strict_types = 1);
4
5
namespace App\Repository;
6
7
use App\Model\Edit;
8
use App\Model\Page;
9
use Doctrine\DBAL\Driver\ResultStatement;
10
use GuzzleHttp\Client;
11
use Psr\Cache\CacheItemPoolInterface;
12
use Psr\Container\ContainerInterface;
13
use Psr\Log\LoggerInterface;
14
15
/**
16
 * ArticleInfoRepository is responsible for retrieving data about a single
17
 * article on a given wiki.
18
 * @codeCoverageIgnore
19
 */
20
class ArticleInfoRepository extends Repository
21
{
22
    protected EditRepository $editRepo;
23
    protected UserRepository $userRepo;
24
25
    /** @var int Maximum number of revisions to process, as configured via app.max_page_revisions */
26
    protected int $maxPageRevisions;
27
28
    /**
29
     * @param ContainerInterface $container
30
     * @param CacheItemPoolInterface $cache
31
     * @param Client $guzzle
32
     * @param LoggerInterface $logger
33
     * @param bool $isWMF
34
     * @param int $queryTimeout
35
     * @param EditRepository $editRepo
36
     */
37
    public function __construct(
38
        ContainerInterface $container,
39
        CacheItemPoolInterface $cache,
40
        Client $guzzle,
41
        LoggerInterface $logger,
42
        bool $isWMF,
43
        int $queryTimeout,
44
        EditRepository $editRepo,
45
        UserRepository $userRepo
46
    ) {
47
        $this->editRepo = $editRepo;
48
        $this->userRepo = $userRepo;
49
        parent::__construct($container, $cache, $guzzle, $logger, $isWMF, $queryTimeout);
50
    }
51
52
    /**
53
     * Get the performance maximum on the number of revisions to process.
54
     * @return int
55
     */
56
    public function getMaxPageRevisions(): int
57
    {
58
        if (!isset($this->maxPageRevisions)) {
59
            $this->maxPageRevisions = (int)$this->container->getParameter('app.max_page_revisions');
0 ignored issues
show
Bug introduced by
The method getParameter() does not exist on Psr\Container\ContainerInterface. It seems like you code against a sub-type of Psr\Container\ContainerInterface such as Symfony\Component\Depend...tion\ContainerInterface. ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-call  annotation

59
            $this->maxPageRevisions = (int)$this->container->/** @scrutinizer ignore-call */ getParameter('app.max_page_revisions');
Loading history...
60
        }
61
        return $this->maxPageRevisions;
62
    }
63
64
    /**
65
     * Factory to instantiate a new Edit for the given revision.
66
     * @param Page $page
67
     * @param array $revision
68
     * @return Edit
69
     */
70
    public function getEdit(Page $page, array $revision): Edit
71
    {
72
        return new Edit($this->editRepo, $this->userRepo, $page, $revision);
73
    }
74
75
    /**
76
     * Get the number of edits made to the page by bots or former bots.
77
     * @param Page $page
78
     * @param false|int $start
79
     * @param false|int $end
80
     * @param ?int $limit
81
     * @param bool $count Return a count rather than the full set of rows.
82
     * @return ResultStatement resolving with keys 'count', 'username' and 'current'.
83
     */
84
    public function getBotData(Page $page, $start, $end, ?int $limit, bool $count = false): ResultStatement
85
    {
86
        $project = $page->getProject();
87
        $revTable = $project->getTableName('revision');
88
        $userGroupsTable = $project->getTableName('user_groups');
89
        $userFormerGroupsTable = $project->getTableName('user_former_groups');
90
        $actorTable = $project->getTableName('actor', 'revision');
91
92
        $datesConditions = $this->getDateConditions($start, $end);
93
94
        if ($count) {
95
            $actorSelect = '';
96
            $groupBy = '';
97
        } else {
98
            $actorSelect = 'actor_name AS username, ';
99
            $groupBy = 'GROUP BY actor_user';
100
        }
101
102
        $limitClause = '';
103
        if (null !== $limit) {
104
            $limitClause = "LIMIT $limit";
105
        }
106
107
        $sql = "SELECT COUNT(DISTINCT rev_id) AS count, $actorSelect '0' AS current
108
                FROM (
109
                    SELECT rev_id, rev_actor, rev_timestamp
110
                    FROM $revTable
111
                    WHERE rev_page = :pageId
112
                    ORDER BY rev_timestamp DESC
113
                    $limitClause
114
                ) a
115
                JOIN $actorTable ON actor_id = rev_actor
116
                LEFT JOIN $userFormerGroupsTable ON actor_user = ufg_user
117
                WHERE ufg_group = 'bot' $datesConditions
118
                $groupBy
119
                UNION
120
                SELECT COUNT(DISTINCT rev_id) AS count, $actorSelect '1' AS current
121
                FROM (
122
                    SELECT rev_id, rev_actor, rev_timestamp
123
                    FROM $revTable
124
                    WHERE rev_page = :pageId
125
                    ORDER BY rev_timestamp DESC
126
                    $limitClause
127
                ) a
128
                JOIN $actorTable ON actor_id = rev_actor
129
                LEFT JOIN $userGroupsTable ON actor_user = ug_user
130
                WHERE ug_group = 'bot' $datesConditions
131
                $groupBy";
132
133
        return $this->executeProjectsQuery($project, $sql, ['pageId' => $page->getId()]);
134
    }
135
136
    /**
137
     * Get prior deletions, page moves, and protections to the page.
138
     * @param Page $page
139
     * @param false|int $start
140
     * @param false|int $end
141
     * @return string[] each entry with keys 'log_action', 'log_type' and 'timestamp'.
142
     */
143
    public function getLogEvents(Page $page, $start, $end): array
144
    {
145
        $cacheKey = $this->getCacheKey(func_get_args(), 'page_logevents');
146
        if ($this->cache->hasItem($cacheKey)) {
147
            return $this->cache->getItem($cacheKey)->get();
148
        }
149
        $loggingTable = $page->getProject()->getTableName('logging', 'logindex');
150
151
        $datesConditions = $this->getDateConditions($start, $end, false, '', 'log_timestamp');
152
153
        $sql = "SELECT log_action, log_type, log_timestamp AS 'timestamp'
154
                FROM $loggingTable
155
                WHERE log_namespace = '" . $page->getNamespace() . "'
156
                AND log_title = :title AND log_timestamp > 1 $datesConditions
157
                AND log_type IN ('delete', 'move', 'protect', 'stable')";
158
        $title = str_replace(' ', '_', $page->getTitle());
159
160
        $result = $this->executeProjectsQuery($page->getProject(), $sql, ['title' => $title])
161
            ->fetchAllAssociative();
162
        return $this->setCache($cacheKey, $result);
163
    }
164
165
    /**
166
     * Get the number of categories, templates, and files that are on the page.
167
     * @param Page $page
168
     * @return array With keys 'categories', 'templates' and 'files'.
169
     */
170
    public function getTransclusionData(Page $page): array
171
    {
172
        $cacheKey = $this->getCacheKey(func_get_args(), 'page_transclusions');
173
        if ($this->cache->hasItem($cacheKey)) {
174
            return $this->cache->getItem($cacheKey)->get();
175
        }
176
177
        $categorylinksTable = $page->getProject()->getTableName('categorylinks');
178
        $templatelinksTable = $page->getProject()->getTableName('templatelinks');
179
        $imagelinksTable = $page->getProject()->getTableName('imagelinks');
180
        $sql = "(
181
                    SELECT 'categories' AS `key`, COUNT(*) AS val
182
                    FROM $categorylinksTable
183
                    WHERE cl_from = :pageId
184
                ) UNION (
185
                    SELECT 'templates' AS `key`, COUNT(*) AS val
186
                    FROM $templatelinksTable
187
                    WHERE tl_from = :pageId
188
                ) UNION (
189
                    SELECT 'files' AS `key`, COUNT(*) AS val
190
                    FROM $imagelinksTable
191
                    WHERE il_from = :pageId
192
                )";
193
        $resultQuery = $this->executeProjectsQuery($page->getProject(), $sql, ['pageId' => $page->getId()]);
194
        $transclusionCounts = [];
195
196
        while ($result = $resultQuery->fetchAssociative()) {
197
            $transclusionCounts[$result['key']] = (int)$result['val'];
198
        }
199
200
        return $this->setCache($cacheKey, $transclusionCounts);
201
    }
202
203
    /**
204
     * Get the top editors to the page by edit count.
205
     * @param Page $page
206
     * @param false|int $start
207
     * @param false|int $end
208
     * @param int $limit
209
     * @param bool $noBots
210
     * @return array
211
     */
212
    public function getTopEditorsByEditCount(
213
        Page $page,
214
        $start = false,
215
        $end = false,
216
        int $limit = 20,
217
        bool $noBots = false
218
    ): array {
219
        $cacheKey = $this->getCacheKey(func_get_args(), 'page_topeditors');
220
        if ($this->cache->hasItem($cacheKey)) {
221
            return $this->cache->getItem($cacheKey)->get();
222
        }
223
224
        $project = $page->getProject();
225
        // Faster to use revision instead of revision_userindex in this case.
226
        $revTable = $project->getTableName('revision', '');
227
        $actorTable = $project->getTableName('actor');
228
229
        $dateConditions = $this->getDateConditions($start, $end);
230
231
        $sql = "SELECT actor_name AS username,
232
                    COUNT(rev_id) AS count,
233
                    SUM(rev_minor_edit) AS minor,
234
                    MIN(rev_timestamp) AS first_timestamp,
235
                    MIN(rev_id) AS first_revid,
236
                    MAX(rev_timestamp) AS latest_timestamp,
237
                    MAX(rev_id) AS latest_revid
238
                FROM $revTable
239
                JOIN $actorTable ON rev_actor = actor_id
240
                WHERE rev_page = :pageId $dateConditions";
241
242
        if ($noBots) {
243
            $userGroupsTable = $project->getTableName('user_groups');
244
            $sql .= "AND NOT EXISTS (
245
                         SELECT 1
246
                         FROM $userGroupsTable
247
                         WHERE ug_user = actor_user
248
                         AND ug_group = 'bot'
249
                     )";
250
        }
251
252
        $sql .= "GROUP BY actor_id
253
                 ORDER BY count DESC
254
                 LIMIT $limit";
255
256
        $result = $this->executeProjectsQuery($project, $sql, [
257
            'pageId' => $page->getId(),
258
        ])->fetchAllAssociative();
259
260
        return $this->setCache($cacheKey, $result);
261
    }
262
263
    /**
264
     * Get various basic info used in the API, including the number of revisions, unique authors, initial author
265
     * and edit count of the initial author. This is combined into one query for better performance. Caching is only
266
     * applied if it took considerable time to process, because using the gadget, this will get hit for a different page
267
     * constantly, where the likelihood of cache benefiting us is slim.
268
     * @param Page $page The page.
269
     * @return string[]|false false if the page was not found.
270
     */
271
    public function getBasicEditingInfo(Page $page)
272
    {
273
        $cacheKey = $this->getCacheKey(func_get_args(), 'page_basicinfo');
274
        if ($this->cache->hasItem($cacheKey)) {
275
            return $this->cache->getItem($cacheKey)->get();
276
        }
277
278
        $project = $page->getProject();
279
        $revTable = $project->getTableName('revision');
280
        $userTable = $project->getTableName('user');
281
        $pageTable = $project->getTableName('page');
282
        $actorTable = $project->getTableName('actor');
283
284
        $sql = "SELECT *, (
285
                    SELECT user_editcount
286
                    FROM $userTable
287
                    WHERE user_id = author_user_id
288
                ) AS author_editcount
289
                FROM (
290
                    (
291
                        SELECT COUNT(rev_id) AS num_edits,
292
                            COUNT(DISTINCT(rev_actor)) AS num_editors,
293
                            SUM(rev_minor_edit) AS minor_edits
294
                        FROM $revTable
295
                        WHERE rev_page = :pageid
296
                        AND rev_timestamp > 0 # Use rev_timestamp index
297
                    ) a,
298
                    (
299
                        # With really old pages, the rev_timestamp may need to be sorted ASC,
300
                        #   and the lowest rev_id may not be the first revision.
301
                        SELECT actor_name AS author,
302
                               actor_user AS author_user_id,
303
                               rev_timestamp AS created_at,
304
                               rev_id AS created_rev_id
305
                        FROM $revTable
306
                        JOIN $actorTable ON actor_id = rev_actor
307
                        WHERE rev_page = :pageid
308
                        ORDER BY rev_timestamp ASC
309
                        LIMIT 1
310
                    ) b,
311
                    (
312
                        SELECT rev_timestamp AS modified_at,
313
                               rev_id AS modified_rev_id
314
                        FROM $revTable
315
                        JOIN $pageTable ON page_id = rev_page
316
                        WHERE rev_page = :pageid
317
                        AND rev_id = page_latest
318
                    ) c
319
                )";
320
        $params = ['pageid' => $page->getId()];
321
322
        // Get current time so we can compare timestamps
323
        // and decide whether or to cache the result.
324
        $time1 = time();
325
326
        /**
327
         * This query can sometimes take too long to run for pages with tens of thousands
328
         * of revisions. This query is used by the ArticleInfo gadget, which shows basic
329
         * data in real-time, so if it takes too long than the user probably didn't even
330
         * wait to see the result. We'll pass 60 as the last parameter to executeProjectsQuery,
331
         * which will set the max_statement_time to 60 seconds.
332
         */
333
        $result = $this->executeProjectsQuery($project, $sql, $params, 60)->fetchAssociative();
334
335
        $time2 = time();
336
337
        // If it took over 5 seconds, cache the result for 20 minutes.
338
        if ($time2 - $time1 > 5) {
339
            $this->setCache($cacheKey, $result, 'PT20M');
340
        }
341
342
        return $result ?? false;
343
    }
344
}
345