Passed
Push — swagger-docs ( 6837d7...7bc867 )
by MusikAnimal
11:22
created

ArticleInfoRepository::__construct()   A

Complexity

Conditions 1
Paths 1

Size

Total Lines 27
Code Lines 13

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
cc 1
eloc 13
nc 1
nop 12
dl 0
loc 27
rs 9.8333
c 0
b 0
f 0

How to fix   Many Parameters   

Many Parameters

Methods with many parameters are not only hard to understand, but their parameters also often become inconsistent when you need more, or different data.

There are several approaches to avoid long parameter lists:

1
<?php
2
3
declare(strict_types = 1);
4
5
namespace App\Repository;
6
7
use App\Helper\AutomatedEditsHelper;
8
use App\Model\Edit;
9
use App\Model\Page;
10
use Doctrine\DBAL\Driver\ResultStatement;
11
use Doctrine\Persistence\ManagerRegistry;
12
use GuzzleHttp\Client;
13
use PDO;
14
use Psr\Cache\CacheItemPoolInterface;
15
use Psr\Log\LoggerInterface;
16
use Symfony\Component\DependencyInjection\ParameterBag\ParameterBagInterface;
17
use Symfony\Component\HttpFoundation\RequestStack;
18
19
/**
20
 * ArticleInfoRepository is responsible for retrieving data about a single
21
 * article on a given wiki.
22
 * @codeCoverageIgnore
23
 */
24
class ArticleInfoRepository extends AutoEditsRepository
25
{
26
    protected EditRepository $editRepo;
27
    protected UserRepository $userRepo;
28
29
    /** @var int Maximum number of revisions to process, as configured via APP_MAX_PAGE_REVISIONS */
30
    protected int $maxPageRevisions;
31
32
    /**
33
     * @param ManagerRegistry $managerRegistry
34
     * @param CacheItemPoolInterface $cache
35
     * @param Client $guzzle
36
     * @param LoggerInterface $logger
37
     * @param ParameterBagInterface $parameterBag
38
     * @param bool $isWMF
39
     * @param int $queryTimeout
40
     * @param EditRepository $editRepo
41
     * @param UserRepository $userRepo
42
     */
43
    public function __construct(
44
        ManagerRegistry $managerRegistry,
45
        CacheItemPoolInterface $cache,
46
        Client $guzzle,
47
        LoggerInterface $logger,
48
        ParameterBagInterface $parameterBag,
49
        bool $isWMF,
50
        int $queryTimeout,
51
        EditRepository $editRepo,
52
        UserRepository $userRepo,
53
        ProjectRepository $projectRepo,
54
        AutomatedEditsHelper $autoEditsHelper,
55
        RequestStack $requestStack
56
    ) {
57
        $this->editRepo = $editRepo;
58
        $this->userRepo = $userRepo;
59
        parent::__construct(
60
            $managerRegistry,
61
            $cache,
62
            $guzzle,
63
            $logger,
64
            $parameterBag,
65
            $isWMF,
66
            $queryTimeout,
67
            $projectRepo,
68
            $autoEditsHelper,
69
            $requestStack
70
        );
71
    }
72
73
    /**
74
     * Get the performance maximum on the number of revisions to process.
75
     * @return int
76
     */
77
    public function getMaxPageRevisions(): int
78
    {
79
        if (!isset($this->maxPageRevisions)) {
80
            $this->maxPageRevisions = (int)$this->parameterBag->get('app.max_page_revisions');
81
        }
82
        return $this->maxPageRevisions;
83
    }
84
85
    /**
86
     * Factory to instantiate a new Edit for the given revision.
87
     * @param Page $page
88
     * @param array $revision
89
     * @return Edit
90
     */
91
    public function getEdit(Page $page, array $revision): Edit
92
    {
93
        return new Edit($this->editRepo, $this->userRepo, $page, $revision);
94
    }
95
96
    /**
97
     * Get the number of edits made to the page by bots or former bots.
98
     * @param Page $page
99
     * @param false|int $start
100
     * @param false|int $end
101
     * @param ?int $limit
102
     * @param bool $count Return a count rather than the full set of rows.
103
     * @return ResultStatement resolving with keys 'count', 'username' and 'current'.
104
     */
105
    public function getBotData(Page $page, $start, $end, ?int $limit, bool $count = false): ResultStatement
106
    {
107
        $project = $page->getProject();
108
        $revTable = $project->getTableName('revision');
109
        $userGroupsTable = $project->getTableName('user_groups');
110
        $userFormerGroupsTable = $project->getTableName('user_former_groups');
111
        $actorTable = $project->getTableName('actor', 'revision');
112
113
        $datesConditions = $this->getDateConditions($start, $end);
114
115
        if ($count) {
116
            $actorSelect = '';
117
            $groupBy = '';
118
        } else {
119
            $actorSelect = 'actor_name AS username, ';
120
            $groupBy = 'GROUP BY actor_user';
121
        }
122
123
        $limitClause = '';
124
        if (null !== $limit) {
125
            $limitClause = "LIMIT $limit";
126
        }
127
128
        $sql = "SELECT COUNT(DISTINCT rev_id) AS `count`, $actorSelect '0' AS `current`
129
                FROM (
130
                    SELECT rev_id, rev_actor, rev_timestamp
131
                    FROM $revTable
132
                    WHERE rev_page = :pageId
133
                    ORDER BY rev_timestamp DESC
134
                    $limitClause
135
                ) a
136
                JOIN $actorTable ON actor_id = rev_actor
137
                LEFT JOIN $userFormerGroupsTable ON actor_user = ufg_user
138
                WHERE ufg_group = 'bot' $datesConditions
139
                $groupBy
140
                UNION
141
                SELECT COUNT(DISTINCT rev_id) AS count, $actorSelect '1' AS current
142
                FROM (
143
                    SELECT rev_id, rev_actor, rev_timestamp
144
                    FROM $revTable
145
                    WHERE rev_page = :pageId
146
                    ORDER BY rev_timestamp DESC
147
                    $limitClause
148
                ) a
149
                JOIN $actorTable ON actor_id = rev_actor
150
                LEFT JOIN $userGroupsTable ON actor_user = ug_user
151
                WHERE ug_group = 'bot' $datesConditions
152
                $groupBy";
153
154
        return $this->executeProjectsQuery($project, $sql, ['pageId' => $page->getId()]);
155
    }
156
157
    /**
158
     * Get prior deletions, page moves, and protections to the page.
159
     * @param Page $page
160
     * @param false|int $start
161
     * @param false|int $end
162
     * @return string[] each entry with keys 'log_action', 'log_type' and 'timestamp'.
163
     */
164
    public function getLogEvents(Page $page, $start, $end): array
165
    {
166
        $cacheKey = $this->getCacheKey(func_get_args(), 'page_logevents');
167
        if ($this->cache->hasItem($cacheKey)) {
168
            return $this->cache->getItem($cacheKey)->get();
169
        }
170
        $loggingTable = $page->getProject()->getTableName('logging', 'logindex');
171
172
        $datesConditions = $this->getDateConditions($start, $end, false, '', 'log_timestamp');
173
174
        $sql = "SELECT log_action, log_type, log_timestamp AS 'timestamp'
175
                FROM $loggingTable
176
                WHERE log_namespace = '" . $page->getNamespace() . "'
177
                AND log_title = :title AND log_timestamp > 1 $datesConditions
178
                AND log_type IN ('delete', 'move', 'protect', 'stable')";
179
        $title = str_replace(' ', '_', $page->getTitle());
180
181
        $result = $this->executeProjectsQuery($page->getProject(), $sql, ['title' => $title])
182
            ->fetchAllAssociative();
183
        return $this->setCache($cacheKey, $result);
184
    }
185
186
    /**
187
     * Get the number of categories, templates, and files that are on the page.
188
     * @param Page $page
189
     * @return array With keys 'categories', 'templates' and 'files'.
190
     */
191
    public function getTransclusionData(Page $page): array
192
    {
193
        $cacheKey = $this->getCacheKey(func_get_args(), 'page_transclusions');
194
        if ($this->cache->hasItem($cacheKey)) {
195
            return $this->cache->getItem($cacheKey)->get();
196
        }
197
198
        $categorylinksTable = $page->getProject()->getTableName('categorylinks');
199
        $templatelinksTable = $page->getProject()->getTableName('templatelinks');
200
        $imagelinksTable = $page->getProject()->getTableName('imagelinks');
201
        $sql = "(
202
                    SELECT 'categories' AS `key`, COUNT(*) AS val
203
                    FROM $categorylinksTable
204
                    WHERE cl_from = :pageId
205
                ) UNION (
206
                    SELECT 'templates' AS `key`, COUNT(*) AS val
207
                    FROM $templatelinksTable
208
                    WHERE tl_from = :pageId
209
                ) UNION (
210
                    SELECT 'files' AS `key`, COUNT(*) AS val
211
                    FROM $imagelinksTable
212
                    WHERE il_from = :pageId
213
                )";
214
        $resultQuery = $this->executeProjectsQuery($page->getProject(), $sql, ['pageId' => $page->getId()]);
215
        $transclusionCounts = [];
216
217
        while ($result = $resultQuery->fetchAssociative()) {
218
            $transclusionCounts[$result['key']] = (int)$result['val'];
219
        }
220
221
        return $this->setCache($cacheKey, $transclusionCounts);
222
    }
223
224
    /**
225
     * Get the top editors to the page by edit count.
226
     * @param Page $page
227
     * @param false|int $start
228
     * @param false|int $end
229
     * @param int $limit
230
     * @param bool $noBots
231
     * @return array
232
     */
233
    public function getTopEditorsByEditCount(
234
        Page $page,
235
        $start = false,
236
        $end = false,
237
        int $limit = 20,
238
        bool $noBots = false
239
    ): array {
240
        $cacheKey = $this->getCacheKey(func_get_args(), 'page_topeditors');
241
        if ($this->cache->hasItem($cacheKey)) {
242
            return $this->cache->getItem($cacheKey)->get();
243
        }
244
245
        $project = $page->getProject();
246
        // Faster to use revision instead of revision_userindex in this case.
247
        $revTable = $project->getTableName('revision', '');
248
        $actorTable = $project->getTableName('actor');
249
250
        $dateConditions = $this->getDateConditions($start, $end);
251
252
        $sql = "SELECT actor_name AS username,
253
                    COUNT(rev_id) AS count,
254
                    SUM(rev_minor_edit) AS minor,
255
                    MIN(rev_timestamp) AS first_timestamp,
256
                    MIN(rev_id) AS first_revid,
257
                    MAX(rev_timestamp) AS latest_timestamp,
258
                    MAX(rev_id) AS latest_revid
259
                FROM $revTable
260
                JOIN $actorTable ON rev_actor = actor_id
261
                WHERE rev_page = :pageId $dateConditions";
262
263
        if ($noBots) {
264
            $userGroupsTable = $project->getTableName('user_groups');
265
            $sql .= "AND NOT EXISTS (
266
                         SELECT 1
267
                         FROM $userGroupsTable
268
                         WHERE ug_user = actor_user
269
                         AND ug_group = 'bot'
270
                     )";
271
        }
272
273
        $sql .= "GROUP BY actor_id
274
                 ORDER BY count DESC
275
                 LIMIT $limit";
276
277
        $result = $this->executeProjectsQuery($project, $sql, [
278
            'pageId' => $page->getId(),
279
        ])->fetchAllAssociative();
280
281
        return $this->setCache($cacheKey, $result);
282
    }
283
284
    /**
285
     * Get various basic info used in the API, including the number of revisions, unique authors, initial author
286
     * and edit count of the initial author. This is combined into one query for better performance. Caching is only
287
     * applied if it took considerable time to process, because using the gadget, this will get hit for a different page
288
     * constantly, where the likelihood of cache benefiting us is slim.
289
     * @param Page $page The page.
290
     * @return string[]|false false if the page was not found.
291
     */
292
    public function getBasicEditingInfo(Page $page)
293
    {
294
        $cacheKey = $this->getCacheKey(func_get_args(), 'page_basicinfo');
295
        if ($this->cache->hasItem($cacheKey)) {
296
            return $this->cache->getItem($cacheKey)->get();
297
        }
298
299
        $project = $page->getProject();
300
        $revTable = $project->getTableName('revision');
301
        $userTable = $project->getTableName('user');
302
        $pageTable = $project->getTableName('page');
303
        $actorTable = $project->getTableName('actor');
304
305
        $sql = "SELECT *, (
306
                    SELECT user_editcount
307
                    FROM $userTable
308
                    WHERE user_id = author_user_id
309
                ) AS author_editcount
310
                FROM (
311
                    (
312
                        SELECT COUNT(rev_id) AS num_edits,
313
                            COUNT(DISTINCT(rev_actor)) AS num_editors,
314
                            SUM(actor_user IS NULL) AS ip_edits,
315
                            SUM(rev_minor_edit) AS minor_edits
316
                        FROM $revTable
317
                        JOIN $actorTable ON actor_id = rev_actor
318
                        WHERE rev_page = :pageid
319
                        AND rev_timestamp > 0 # Use rev_timestamp index
320
                    ) a,
321
                    (
322
                        # With really old pages, the rev_timestamp may need to be sorted ASC,
323
                        #   and the lowest rev_id may not be the first revision.
324
                        SELECT actor_name AS author,
325
                               actor_user AS author_user_id,
326
                               rev_timestamp AS created_at,
327
                               rev_id AS created_rev_id
328
                        FROM $revTable
329
                        JOIN $actorTable ON actor_id = rev_actor
330
                        WHERE rev_page = :pageid
331
                        ORDER BY rev_timestamp ASC
332
                        LIMIT 1
333
                    ) b,
334
                    (
335
                        SELECT rev_timestamp AS modified_at,
336
                               rev_id AS modified_rev_id
337
                        FROM $revTable
338
                        JOIN $pageTable ON page_id = rev_page
339
                        WHERE rev_page = :pageid
340
                        AND rev_id = page_latest
341
                    ) c
342
                )";
343
        $params = ['pageid' => $page->getId()];
344
345
        // Get current time so we can compare timestamps
346
        // and decide whether or to cache the result.
347
        $time1 = time();
348
349
        /**
350
         * This query can sometimes take too long to run for pages with tens of thousands
351
         * of revisions. This query is used by the ArticleInfo gadget, which shows basic
352
         * data in real-time, so if it takes too long than the user probably didn't even
353
         * wait to see the result. We'll pass 60 as the last parameter to executeProjectsQuery,
354
         * which will set the max_statement_time to 60 seconds.
355
         */
356
        $result = $this->executeProjectsQuery($project, $sql, $params, 60)->fetchAssociative();
357
358
        $time2 = time();
359
360
        // If it took over 5 seconds, cache the result for 20 minutes.
361
        if ($time2 - $time1 > 5) {
362
            $this->setCache($cacheKey, $result, 'PT20M');
363
        }
364
365
        return $result ?? false;
366
    }
367
368
    /**
369
     * Get counts of (semi-)automated tools that were used to edit the page.
370
     * @param Page $page
371
     * @param $start
372
     * @param $end
373
     * @return array
374
     */
375
    public function getAutoEditsCounts(Page $page, $start, $end): array
376
    {
377
        $cacheKey = $this->getCacheKey(func_get_args(), 'user_autoeditcount');
378
        if ($this->cache->hasItem($cacheKey)) {
379
            return $this->cache->getItem($cacheKey)->get();
380
        }
381
382
        $project = $page->getProject();
383
        $tools = $this->getTools($project);
384
        $queries = [];
385
        $revisionTable = $project->getTableName('revision', '');
386
        $pageTable = $project->getTableName('page');
387
        $pageJoin = "LEFT JOIN $pageTable ON rev_page = page_id";
388
        $revDateConditions = $this->getDateConditions($start, $end);
389
        $conn = $this->getProjectsConnection($project);
390
391
        foreach ($tools as $toolName => $values) {
392
            [$condTool, $commentJoin, $tagJoin] = $this->getInnerAutomatedCountsSql($project, $toolName, $values);
393
            $toolName = $conn->quote($toolName, PDO::PARAM_STR);
394
395
            // No regex or tag provided for this tool. This can happen for tag-only tools that are in the global
396
            // configuration, but no local tag exists on the said project.
397
            if ('' === $condTool) {
398
                continue;
399
            }
400
401
            $queries[] .= "
402
                SELECT $toolName AS toolname, COUNT(DISTINCT(rev_id)) AS count
403
                FROM $revisionTable
404
                $pageJoin
405
                $commentJoin
406
                $tagJoin
407
                WHERE $condTool
408
                    AND rev_page = :pageId
409
                $revDateConditions";
410
        }
411
412
        $sql = implode(' UNION ', $queries);
413
        $resultQuery = $this->executeProjectsQuery($project, $sql, [
414
            'pageId' => $page->getId(),
415
        ]);
416
417
        $results = [];
418
419
        while ($row = $resultQuery->fetchAssociative()) {
420
            // Only track tools that they've used at least once
421
            $tool = $row['toolname'];
422
            if ($row['count'] > 0) {
423
                $results[$tool] = [
424
                    'link' => $tools[$tool]['link'],
425
                    'label' => $tools[$tool]['label'] ?? $tool,
426
                    'count' => $row['count'],
427
                ];
428
            }
429
        }
430
431
        // Sort the array by count
432
        uasort($results, function ($a, $b) {
433
            return $b['count'] - $a['count'];
434
        });
435
436
        return $this->setCache($cacheKey, $results);
437
    }
438
}
439