Passed
Push — swagger-docs ( 6837d7...7bc867 )
by MusikAnimal
11:22
created

ArticleInfoRepository::getAutoEditsCounts()   B

Complexity

Conditions 6
Paths 10

Size

Total Lines 62
Code Lines 38

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
cc 6
eloc 38
nc 10
nop 3
dl 0
loc 62
rs 8.6897
c 0
b 0
f 0

How to fix   Long Method   

Long Method

Small methods make your code easier to understand, in particular if combined with a good name. Besides, if your method is small, finding a good name is usually much easier.

For example, if you find yourself adding comments to a method's body, this is usually a good sign to extract the commented part to a new method, and use the comment as a starting point when coming up with a good name for this new method.

Commonly applied refactorings include:

1
<?php
2
3
declare(strict_types = 1);
4
5
namespace App\Repository;
6
7
use App\Helper\AutomatedEditsHelper;
8
use App\Model\Edit;
9
use App\Model\Page;
10
use Doctrine\DBAL\Driver\ResultStatement;
11
use Doctrine\Persistence\ManagerRegistry;
12
use GuzzleHttp\Client;
13
use PDO;
14
use Psr\Cache\CacheItemPoolInterface;
15
use Psr\Log\LoggerInterface;
16
use Symfony\Component\DependencyInjection\ParameterBag\ParameterBagInterface;
17
use Symfony\Component\HttpFoundation\RequestStack;
18
19
/**
20
 * ArticleInfoRepository is responsible for retrieving data about a single
21
 * article on a given wiki.
22
 * @codeCoverageIgnore
23
 */
24
class ArticleInfoRepository extends AutoEditsRepository
25
{
26
    protected EditRepository $editRepo;
27
    protected UserRepository $userRepo;
28
29
    /** @var int Maximum number of revisions to process, as configured via APP_MAX_PAGE_REVISIONS */
30
    protected int $maxPageRevisions;
31
32
    /**
33
     * @param ManagerRegistry $managerRegistry
34
     * @param CacheItemPoolInterface $cache
35
     * @param Client $guzzle
36
     * @param LoggerInterface $logger
37
     * @param ParameterBagInterface $parameterBag
38
     * @param bool $isWMF
39
     * @param int $queryTimeout
40
     * @param EditRepository $editRepo
41
     * @param UserRepository $userRepo
42
     */
43
    public function __construct(
44
        ManagerRegistry $managerRegistry,
45
        CacheItemPoolInterface $cache,
46
        Client $guzzle,
47
        LoggerInterface $logger,
48
        ParameterBagInterface $parameterBag,
49
        bool $isWMF,
50
        int $queryTimeout,
51
        EditRepository $editRepo,
52
        UserRepository $userRepo,
53
        ProjectRepository $projectRepo,
54
        AutomatedEditsHelper $autoEditsHelper,
55
        RequestStack $requestStack
56
    ) {
57
        $this->editRepo = $editRepo;
58
        $this->userRepo = $userRepo;
59
        parent::__construct(
60
            $managerRegistry,
61
            $cache,
62
            $guzzle,
63
            $logger,
64
            $parameterBag,
65
            $isWMF,
66
            $queryTimeout,
67
            $projectRepo,
68
            $autoEditsHelper,
69
            $requestStack
70
        );
71
    }
72
73
    /**
74
     * Get the performance maximum on the number of revisions to process.
75
     * @return int
76
     */
77
    public function getMaxPageRevisions(): int
78
    {
79
        if (!isset($this->maxPageRevisions)) {
80
            $this->maxPageRevisions = (int)$this->parameterBag->get('app.max_page_revisions');
81
        }
82
        return $this->maxPageRevisions;
83
    }
84
85
    /**
86
     * Factory to instantiate a new Edit for the given revision.
87
     * @param Page $page
88
     * @param array $revision
89
     * @return Edit
90
     */
91
    public function getEdit(Page $page, array $revision): Edit
92
    {
93
        return new Edit($this->editRepo, $this->userRepo, $page, $revision);
94
    }
95
96
    /**
97
     * Get the number of edits made to the page by bots or former bots.
98
     * @param Page $page
99
     * @param false|int $start
100
     * @param false|int $end
101
     * @param ?int $limit
102
     * @param bool $count Return a count rather than the full set of rows.
103
     * @return ResultStatement resolving with keys 'count', 'username' and 'current'.
104
     */
105
    public function getBotData(Page $page, $start, $end, ?int $limit, bool $count = false): ResultStatement
106
    {
107
        $project = $page->getProject();
108
        $revTable = $project->getTableName('revision');
109
        $userGroupsTable = $project->getTableName('user_groups');
110
        $userFormerGroupsTable = $project->getTableName('user_former_groups');
111
        $actorTable = $project->getTableName('actor', 'revision');
112
113
        $datesConditions = $this->getDateConditions($start, $end);
114
115
        if ($count) {
116
            $actorSelect = '';
117
            $groupBy = '';
118
        } else {
119
            $actorSelect = 'actor_name AS username, ';
120
            $groupBy = 'GROUP BY actor_user';
121
        }
122
123
        $limitClause = '';
124
        if (null !== $limit) {
125
            $limitClause = "LIMIT $limit";
126
        }
127
128
        $sql = "SELECT COUNT(DISTINCT rev_id) AS `count`, $actorSelect '0' AS `current`
129
                FROM (
130
                    SELECT rev_id, rev_actor, rev_timestamp
131
                    FROM $revTable
132
                    WHERE rev_page = :pageId
133
                    ORDER BY rev_timestamp DESC
134
                    $limitClause
135
                ) a
136
                JOIN $actorTable ON actor_id = rev_actor
137
                LEFT JOIN $userFormerGroupsTable ON actor_user = ufg_user
138
                WHERE ufg_group = 'bot' $datesConditions
139
                $groupBy
140
                UNION
141
                SELECT COUNT(DISTINCT rev_id) AS count, $actorSelect '1' AS current
142
                FROM (
143
                    SELECT rev_id, rev_actor, rev_timestamp
144
                    FROM $revTable
145
                    WHERE rev_page = :pageId
146
                    ORDER BY rev_timestamp DESC
147
                    $limitClause
148
                ) a
149
                JOIN $actorTable ON actor_id = rev_actor
150
                LEFT JOIN $userGroupsTable ON actor_user = ug_user
151
                WHERE ug_group = 'bot' $datesConditions
152
                $groupBy";
153
154
        return $this->executeProjectsQuery($project, $sql, ['pageId' => $page->getId()]);
155
    }
156
157
    /**
158
     * Get prior deletions, page moves, and protections to the page.
159
     * @param Page $page
160
     * @param false|int $start
161
     * @param false|int $end
162
     * @return string[] each entry with keys 'log_action', 'log_type' and 'timestamp'.
163
     */
164
    public function getLogEvents(Page $page, $start, $end): array
165
    {
166
        $cacheKey = $this->getCacheKey(func_get_args(), 'page_logevents');
167
        if ($this->cache->hasItem($cacheKey)) {
168
            return $this->cache->getItem($cacheKey)->get();
169
        }
170
        $loggingTable = $page->getProject()->getTableName('logging', 'logindex');
171
172
        $datesConditions = $this->getDateConditions($start, $end, false, '', 'log_timestamp');
173
174
        $sql = "SELECT log_action, log_type, log_timestamp AS 'timestamp'
175
                FROM $loggingTable
176
                WHERE log_namespace = '" . $page->getNamespace() . "'
177
                AND log_title = :title AND log_timestamp > 1 $datesConditions
178
                AND log_type IN ('delete', 'move', 'protect', 'stable')";
179
        $title = str_replace(' ', '_', $page->getTitle());
180
181
        $result = $this->executeProjectsQuery($page->getProject(), $sql, ['title' => $title])
182
            ->fetchAllAssociative();
183
        return $this->setCache($cacheKey, $result);
184
    }
185
186
    /**
187
     * Get the number of categories, templates, and files that are on the page.
188
     * @param Page $page
189
     * @return array With keys 'categories', 'templates' and 'files'.
190
     */
191
    public function getTransclusionData(Page $page): array
192
    {
193
        $cacheKey = $this->getCacheKey(func_get_args(), 'page_transclusions');
194
        if ($this->cache->hasItem($cacheKey)) {
195
            return $this->cache->getItem($cacheKey)->get();
196
        }
197
198
        $categorylinksTable = $page->getProject()->getTableName('categorylinks');
199
        $templatelinksTable = $page->getProject()->getTableName('templatelinks');
200
        $imagelinksTable = $page->getProject()->getTableName('imagelinks');
201
        $sql = "(
202
                    SELECT 'categories' AS `key`, COUNT(*) AS val
203
                    FROM $categorylinksTable
204
                    WHERE cl_from = :pageId
205
                ) UNION (
206
                    SELECT 'templates' AS `key`, COUNT(*) AS val
207
                    FROM $templatelinksTable
208
                    WHERE tl_from = :pageId
209
                ) UNION (
210
                    SELECT 'files' AS `key`, COUNT(*) AS val
211
                    FROM $imagelinksTable
212
                    WHERE il_from = :pageId
213
                )";
214
        $resultQuery = $this->executeProjectsQuery($page->getProject(), $sql, ['pageId' => $page->getId()]);
215
        $transclusionCounts = [];
216
217
        while ($result = $resultQuery->fetchAssociative()) {
218
            $transclusionCounts[$result['key']] = (int)$result['val'];
219
        }
220
221
        return $this->setCache($cacheKey, $transclusionCounts);
222
    }
223
224
    /**
225
     * Get the top editors to the page by edit count.
226
     * @param Page $page
227
     * @param false|int $start
228
     * @param false|int $end
229
     * @param int $limit
230
     * @param bool $noBots
231
     * @return array
232
     */
233
    public function getTopEditorsByEditCount(
234
        Page $page,
235
        $start = false,
236
        $end = false,
237
        int $limit = 20,
238
        bool $noBots = false
239
    ): array {
240
        $cacheKey = $this->getCacheKey(func_get_args(), 'page_topeditors');
241
        if ($this->cache->hasItem($cacheKey)) {
242
            return $this->cache->getItem($cacheKey)->get();
243
        }
244
245
        $project = $page->getProject();
246
        // Faster to use revision instead of revision_userindex in this case.
247
        $revTable = $project->getTableName('revision', '');
248
        $actorTable = $project->getTableName('actor');
249
250
        $dateConditions = $this->getDateConditions($start, $end);
251
252
        $sql = "SELECT actor_name AS username,
253
                    COUNT(rev_id) AS count,
254
                    SUM(rev_minor_edit) AS minor,
255
                    MIN(rev_timestamp) AS first_timestamp,
256
                    MIN(rev_id) AS first_revid,
257
                    MAX(rev_timestamp) AS latest_timestamp,
258
                    MAX(rev_id) AS latest_revid
259
                FROM $revTable
260
                JOIN $actorTable ON rev_actor = actor_id
261
                WHERE rev_page = :pageId $dateConditions";
262
263
        if ($noBots) {
264
            $userGroupsTable = $project->getTableName('user_groups');
265
            $sql .= "AND NOT EXISTS (
266
                         SELECT 1
267
                         FROM $userGroupsTable
268
                         WHERE ug_user = actor_user
269
                         AND ug_group = 'bot'
270
                     )";
271
        }
272
273
        $sql .= "GROUP BY actor_id
274
                 ORDER BY count DESC
275
                 LIMIT $limit";
276
277
        $result = $this->executeProjectsQuery($project, $sql, [
278
            'pageId' => $page->getId(),
279
        ])->fetchAllAssociative();
280
281
        return $this->setCache($cacheKey, $result);
282
    }
283
284
    /**
285
     * Get various basic info used in the API, including the number of revisions, unique authors, initial author
286
     * and edit count of the initial author. This is combined into one query for better performance. Caching is only
287
     * applied if it took considerable time to process, because using the gadget, this will get hit for a different page
288
     * constantly, where the likelihood of cache benefiting us is slim.
289
     * @param Page $page The page.
290
     * @return string[]|false false if the page was not found.
291
     */
292
    public function getBasicEditingInfo(Page $page)
293
    {
294
        $cacheKey = $this->getCacheKey(func_get_args(), 'page_basicinfo');
295
        if ($this->cache->hasItem($cacheKey)) {
296
            return $this->cache->getItem($cacheKey)->get();
297
        }
298
299
        $project = $page->getProject();
300
        $revTable = $project->getTableName('revision');
301
        $userTable = $project->getTableName('user');
302
        $pageTable = $project->getTableName('page');
303
        $actorTable = $project->getTableName('actor');
304
305
        $sql = "SELECT *, (
306
                    SELECT user_editcount
307
                    FROM $userTable
308
                    WHERE user_id = author_user_id
309
                ) AS author_editcount
310
                FROM (
311
                    (
312
                        SELECT COUNT(rev_id) AS num_edits,
313
                            COUNT(DISTINCT(rev_actor)) AS num_editors,
314
                            SUM(actor_user IS NULL) AS ip_edits,
315
                            SUM(rev_minor_edit) AS minor_edits
316
                        FROM $revTable
317
                        JOIN $actorTable ON actor_id = rev_actor
318
                        WHERE rev_page = :pageid
319
                        AND rev_timestamp > 0 # Use rev_timestamp index
320
                    ) a,
321
                    (
322
                        # With really old pages, the rev_timestamp may need to be sorted ASC,
323
                        #   and the lowest rev_id may not be the first revision.
324
                        SELECT actor_name AS author,
325
                               actor_user AS author_user_id,
326
                               rev_timestamp AS created_at,
327
                               rev_id AS created_rev_id
328
                        FROM $revTable
329
                        JOIN $actorTable ON actor_id = rev_actor
330
                        WHERE rev_page = :pageid
331
                        ORDER BY rev_timestamp ASC
332
                        LIMIT 1
333
                    ) b,
334
                    (
335
                        SELECT rev_timestamp AS modified_at,
336
                               rev_id AS modified_rev_id
337
                        FROM $revTable
338
                        JOIN $pageTable ON page_id = rev_page
339
                        WHERE rev_page = :pageid
340
                        AND rev_id = page_latest
341
                    ) c
342
                )";
343
        $params = ['pageid' => $page->getId()];
344
345
        // Get current time so we can compare timestamps
346
        // and decide whether or to cache the result.
347
        $time1 = time();
348
349
        /**
350
         * This query can sometimes take too long to run for pages with tens of thousands
351
         * of revisions. This query is used by the ArticleInfo gadget, which shows basic
352
         * data in real-time, so if it takes too long than the user probably didn't even
353
         * wait to see the result. We'll pass 60 as the last parameter to executeProjectsQuery,
354
         * which will set the max_statement_time to 60 seconds.
355
         */
356
        $result = $this->executeProjectsQuery($project, $sql, $params, 60)->fetchAssociative();
357
358
        $time2 = time();
359
360
        // If it took over 5 seconds, cache the result for 20 minutes.
361
        if ($time2 - $time1 > 5) {
362
            $this->setCache($cacheKey, $result, 'PT20M');
363
        }
364
365
        return $result ?? false;
366
    }
367
368
    /**
369
     * Get counts of (semi-)automated tools that were used to edit the page.
370
     * @param Page $page
371
     * @param $start
372
     * @param $end
373
     * @return array
374
     */
375
    public function getAutoEditsCounts(Page $page, $start, $end): array
376
    {
377
        $cacheKey = $this->getCacheKey(func_get_args(), 'user_autoeditcount');
378
        if ($this->cache->hasItem($cacheKey)) {
379
            return $this->cache->getItem($cacheKey)->get();
380
        }
381
382
        $project = $page->getProject();
383
        $tools = $this->getTools($project);
384
        $queries = [];
385
        $revisionTable = $project->getTableName('revision', '');
386
        $pageTable = $project->getTableName('page');
387
        $pageJoin = "LEFT JOIN $pageTable ON rev_page = page_id";
388
        $revDateConditions = $this->getDateConditions($start, $end);
389
        $conn = $this->getProjectsConnection($project);
390
391
        foreach ($tools as $toolName => $values) {
392
            [$condTool, $commentJoin, $tagJoin] = $this->getInnerAutomatedCountsSql($project, $toolName, $values);
393
            $toolName = $conn->quote($toolName, PDO::PARAM_STR);
394
395
            // No regex or tag provided for this tool. This can happen for tag-only tools that are in the global
396
            // configuration, but no local tag exists on the said project.
397
            if ('' === $condTool) {
398
                continue;
399
            }
400
401
            $queries[] .= "
402
                SELECT $toolName AS toolname, COUNT(DISTINCT(rev_id)) AS count
403
                FROM $revisionTable
404
                $pageJoin
405
                $commentJoin
406
                $tagJoin
407
                WHERE $condTool
408
                    AND rev_page = :pageId
409
                $revDateConditions";
410
        }
411
412
        $sql = implode(' UNION ', $queries);
413
        $resultQuery = $this->executeProjectsQuery($project, $sql, [
414
            'pageId' => $page->getId(),
415
        ]);
416
417
        $results = [];
418
419
        while ($row = $resultQuery->fetchAssociative()) {
420
            // Only track tools that they've used at least once
421
            $tool = $row['toolname'];
422
            if ($row['count'] > 0) {
423
                $results[$tool] = [
424
                    'link' => $tools[$tool]['link'],
425
                    'label' => $tools[$tool]['label'] ?? $tool,
426
                    'count' => $row['count'],
427
                ];
428
            }
429
        }
430
431
        // Sort the array by count
432
        uasort($results, function ($a, $b) {
433
            return $b['count'] - $a['count'];
434
        });
435
436
        return $this->setCache($cacheKey, $results);
437
    }
438
}
439