Passed
Push — master ( b5ac54...2d882d )
by MusikAnimal
54s queued 12s
created

EventWikiRepository   C

Complexity

Total Complexity 57

Size/Duplication

Total Lines 680
Duplicated Lines 0 %

Importance

Changes 0
Metric Value
wmc 57
eloc 286
dl 0
loc 680
rs 5.04
c 0
b 0
f 0

17 Methods

Rating   Name   Duplication   Size   Complexity  
A getAvailableWikis() 0 20 1
A wikifyInternalLinks() 0 19 2
A getDbNameFromDomain() 0 16 2
A wikifyString() 0 20 3
A getWikiFamilyName() 0 9 1
A getDomainFromEventWikiInput() 0 31 5
C getPageIds() 0 59 15
A getEntityClass() 0 3 1
A getPageviews() 0 37 6
A getPageTitles() 0 11 3
A getBytesChanged() 0 49 2
A getUsersFromPageIDs() 0 15 1
A getPagesImprovedData() 0 39 3
A getSinglePageImprovedData() 0 78 4
A getPagesCreatedData() 0 42 3
A getSinglePageCreatedData() 0 65 4
A getPageviewsRepository() 0 5 1

How to fix   Complexity   

Complex Class

Complex classes like EventWikiRepository often do a lot of different things. To break such a class down, we need to identify a cohesive component within that class. A common approach to find such a component is to look for fields/methods that share the same prefixes, or suffixes.

Once you have determined the fields that belong together, you can apply the Extract Class refactoring. If the component makes sense as a sub-class, Extract Subclass is also a candidate, and is often faster.

While breaking up the class, it is a good idea to analyze how other classes use EventWikiRepository, and based on these observations, apply Extract Interface, too.

1
<?php
2
/**
3
 * This file contains only the EventWikiRepository class.
4
 */
5
6
declare(strict_types=1);
7
8
namespace AppBundle\Repository;
9
10
use AppBundle\Model\Event;
11
use AppBundle\Model\EventWiki;
12
use DateTime;
13
use Doctrine\DBAL\Connection;
14
use Exception;
15
16
/**
17
 * This class supplies and fetches data for the EventWiki class.
18
 * @codeCoverageIgnore
19
 */
20
class EventWikiRepository extends Repository
21
{
22
    /** Max number of pages to be returned when generating page IDs. */
23
    public const MAX_PAGES = 50000;
24
25
    /**
26
     * Class name of associated entity.
27
     * Implements Repository::getEntityClass
28
     * @return string
29
     */
30
    public function getEntityClass(): string
31
    {
32
        return EventWiki::class;
33
    }
34
35
    /**
36
     * Get the wiki's domain name without the .org given a database name or domain.
37
     * @param string $value
38
     * @return string|null Null if no wiki was found.
39
     */
40
    public function getDomainFromEventWikiInput(string $value): ?string
41
    {
42
        if ('*.' === substr($value, 0, 2)) {
43
            $ret = $this->getWikiFamilyName(substr($value, 2));
44
            return null !== $ret ? '*.'.$ret : null;
45
        }
46
47
        $conn = $this->getMetaConnection();
48
        $rqb = $conn->createQueryBuilder();
49
        $rqb->select(['dbname, url'])
50
            ->from('wiki')
51
            ->where($rqb->expr()->eq('dbname', ':project'))
52
            ->orWhere($rqb->expr()->like('url', ':projectUrl'))
53
            ->orWhere($rqb->expr()->like('url', ':projectUrl2'))
54
            ->setParameter('project', str_replace('_p', '', $value))
55
            ->setParameter('projectUrl', "https://$value")
56
            ->setParameter('projectUrl2', "https://$value.org");
57
        $ret = $this->executeQueryBuilder($rqb)->fetch();
58
59
        // No matches found.
60
        if (!$ret) {
61
            return null;
62
        }
63
64
        // Extract and return just the domain name without '.org' suffix.
65
        $matches = [];
66
        preg_match('/^https?\:\/\/(.*)\.org$/', $ret['url'], $matches);
67
        if (isset($matches[1])) {
68
            return $matches[1];
69
        } else {
70
            return null;
71
        }
72
    }
73
74
    /**
75
     * This effectively validates the given name as a wiki family
76
     * (wikipedia, wiktionary, etc). Null is returned if invalid.
77
     * @param string $value The wiki family name.
78
     * @return string|null The wiki family name, or null if invalid.
79
     */
80
    public function getWikiFamilyName(string $value): ?string
81
    {
82
        $conn = $this->getMetaConnection();
83
        $rqb = $conn->createQueryBuilder();
84
        $rqb->select(['family'])
85
            ->from('wiki')
86
            ->where($rqb->expr()->eq('family', ':family'))
87
            ->setParameter('family', $value);
88
        return $this->executeQueryBuilder($rqb)->fetch()['family'];
89
    }
90
91
    /**
92
     * Get the database name of the given (partial) domain name.
93
     * @param string $domain The domain name, without trailing '.org'.
94
     * @return string Null if not found.
95
     */
96
    public function getDbNameFromDomain(string $domain): string
97
    {
98
        $projectUrl = "https://$domain.org";
99
100
        $conn = $this->getMetaConnection();
101
        $rqb = $conn->createQueryBuilder();
102
        $rqb->select(["CONCAT(dbname, '_p') AS dbname"])
103
            ->from('wiki')
104
            ->where('url = :projectUrl')
105
            ->setParameter('projectUrl', $projectUrl);
106
107
        $row = $this->executeQueryBuilder($rqb)->fetch();
108
        if (!isset($row['dbname'])) {
109
            throw new Exception("Unable to determine database name for domain '$domain'.");
110
        }
111
        return $row['dbname'];
112
    }
113
114
    /**
115
     * Public static method to convert wikitext to HTML, can be used on any arbitrary string.
116
     * Does NOT support section links unless you specify a page.
117
     * @param string $wikitext
118
     * @param string $domain The project domain such as en.wikipedia
119
     * @param string $pageTitle The title of the page, including namespace.
120
     * @return string
121
     * @static
122
     */
123
    public static function wikifyString(string $wikitext, string $domain, ?string $pageTitle = null): string
124
    {
125
        $wikitext = htmlspecialchars(html_entity_decode($wikitext), ENT_NOQUOTES);
126
        $sectionMatch = null;
127
        $isSection = preg_match_all("/^\/\* (.*?) \*\//", $wikitext, $sectionMatch);
128
        $pagePath = "https://$domain.org/wiki/";
129
130
        if ($isSection && isset($pageTitle)) {
131
            $pageUrl = $pagePath.ucfirst(str_replace(' ', '_', $pageTitle));
132
            $sectionTitle = $sectionMatch[1][0];
133
134
            // Must have underscores for the link to properly go to the section.
135
            $sectionTitleLink = htmlspecialchars(str_replace(' ', '_', $sectionTitle));
136
137
            $sectionWikitext = "<a target='_blank' href='$pageUrl#$sectionTitleLink'>&rarr;</a>".
138
                "<em class='text-muted'>".htmlspecialchars($sectionTitle).":</em> ";
139
            $wikitext = str_replace($sectionMatch[0][0], trim($sectionWikitext), $wikitext);
140
        }
141
142
        return self::wikifyInternalLinks($wikitext, $domain);
143
    }
144
145
    /**
146
     * Converts internal links in wikitext to HTML.
147
     * @param string $wikitext
148
     * @param string $domain The project domain such as en.wikipedia
149
     * @return string Updated wikitext.
150
     * @static
151
     */
152
    private static function wikifyInternalLinks(string $wikitext, string $domain): string
153
    {
154
        $pagePath = "https://$domain.org/wiki/";
155
        $linkMatch = null;
156
157
        while (preg_match_all("/\[\[:?(.*?)\]\]/", $wikitext, $linkMatch)) {
158
            $wikiLinkParts = explode('|', $linkMatch[1][0]);
159
            $wikiLinkPath = htmlspecialchars($wikiLinkParts[0]);
160
            $wikiLinkText = htmlspecialchars(
161
                $wikiLinkParts[1] ?? $wikiLinkPath
162
            );
163
164
            // Use normalized page title (underscored, capitalized).
165
            $pageUrl = $pagePath.ucfirst(str_replace(' ', '_', $wikiLinkPath));
166
            $link = "<a target='_blank' href='$pageUrl'>$wikiLinkText</a>";
167
            $wikitext = str_replace($linkMatch[0][0], $link, $wikitext);
168
        }
169
170
        return $wikitext;
171
    }
172
173
    /**
174
     * Get all available wikis on the replicas, as defined by EventWiki::VALID_WIKI_PATTERN.
175
     * @return string[] With domain as the keys, database name as the values.
176
     */
177
    public function getAvailableWikis(): array
178
    {
179
        /** @var string $validWikiRegex Regex-escaped and without surrounding forward slashes. */
180
        $validWikiRegex = str_replace(
181
            '\\',
182
            '\\\\',
183
            trim(EventWiki::VALID_WIKI_PATTERN, '/')
184
        );
185
186
        $conn = $this->getMetaConnection();
187
        $rqb = $conn->createQueryBuilder();
188
        $rqb->select([
189
            "REGEXP_REPLACE(url, 'https?:\/\/(.*)\.org', '\\\\1')",
190
            "CONCAT(dbname, '_p')",
191
        ])
192
            ->from('wiki')
193
            ->where('is_closed = 0')
194
            ->andWhere("url RLIKE '$validWikiRegex'");
195
196
        return $this->executeQueryBuilder($rqb)->fetchAll(\PDO::FETCH_KEY_PAIR);
197
    }
198
199
    /**
200
     * Get all unique page IDs edited/created within the Event for the given wiki. If you need to do this for pages
201
     * within specific categories, without participants, use EventCategoryRepository::getPagesInCategories().
202
     * @param string $dbName
203
     * @param DateTime $start
204
     * @param DateTime $end
205
     * @param int[] $actors
206
     * @param string[] $categoryTitles
207
     * @param string $type Whether only pages 'created' or 'edited' should be returned. Default is to return both.
208
     *   To get pages improved, first get edited then use array_diff against created.
209
     * @return int[]
210
     */
211
    public function getPageIds(
212
        string $dbName,
213
        DateTime $start,
214
        DateTime $end,
215
        array $actors = [],
216
        array $categoryTitles = [],
217
        string $type = ''
218
    ): array {
219
        if ((empty($actors) && empty($categoryTitles)) ||
220
            // No local file uploads unless there are participants.
221
            ('commonswiki_p' !== $dbName && empty($actors) && 'files' === $type)
222
        ) {
223
            return [];
224
        }
225
226
        // Categories are ignored for local file uploads (non-Commons).
227
        $shouldUseCategories = count($categoryTitles) > 0 && !('files' === $type && 'commonswiki_p' !== $dbName);
228
229
        $start = $start->format('YmdHis');
230
        $end = $end->format('YmdHis');
231
232
        $conn = $this->getReplicaConnection();
233
        $rqb = $conn->createQueryBuilder();
234
235
        // Normal `revision` table is faster if you're not filtering by user.
236
        $revisionTable = $this->getTableName('revision', empty($actors) ? '' : 'userindex');
237
238
        $rqb->select('DISTINCT rev_page')
239
            ->from("$dbName.$revisionTable")
240
            ->join("$dbName.$revisionTable", "$dbName.page", 'page_rev', 'page_id = rev_page');
241
242
        if ($shouldUseCategories) {
243
            $rqb->join("$dbName.$revisionTable", "$dbName.categorylinks", 'category_rev', 'cl_from = rev_page')
244
                ->where('cl_to IN (:categoryTitles)')
245
                ->setParameter('categoryTitles', $categoryTitles, Connection::PARAM_STR_ARRAY);
246
        }
247
248
        $nsId = 'files' === $type ? 6 : 0;
249
        $rqb->andWhere("page_namespace = $nsId")
250
            ->andWhere('page_is_redirect = 0')
251
            ->andWhere('rev_timestamp BETWEEN :start AND :end')
252
            ->setParameter('start', $start)
253
            ->setParameter('end', $end);
254
255
        if (count($actors) > 0) {
256
            $rqb->andWhere($rqb->expr()->in('rev_actor', ':actors'))
257
                ->setParameter('actors', $actors, Connection::PARAM_INT_ARRAY);
258
        }
259
260
        // If only pages created, edited or files are being requested, limit based on the presence of a parent revision.
261
        if (in_array($type, ['created', 'edited', 'files'])) {
262
            $typeOperator = 'edited' === $type ? '!=' : '=';
263
            $rqb->andWhere("rev_parent_id $typeOperator 0");
264
        }
265
266
        $rqb->setMaxResults(self::MAX_PAGES);
267
268
        $result = $this->executeQueryBuilder($rqb)->fetchAll(\PDO::FETCH_COLUMN);
269
        return $result ? array_map('intval', $result) : $result;
270
    }
271
272
    /**
273
     * Get the total pageviews count for a set of pages, from a given date until today. Optionally reduce to an average
274
     * of the last N days, where N is Event::AVAILABLE_METRICS['pages-improved-pageviews-avg'].
275
     * @param string $dbName
276
     * @param string $domain
277
     * @param DateTime $start
278
     * @param int[] $pageIds
279
     * @param bool $getDailyAverage
280
     * @return int
281
     */
282
    public function getPageviews(
283
        string $dbName,
284
        string $domain,
285
        DateTime $start,
286
        array $pageIds,
287
        bool $getDailyAverage = false
288
    ): int {
289
        if (0 === count($pageIds)) {
290
            return 0;
291
        }
292
293
        $pageviewsRepo = $this->getPageviewsRepository();
294
        $recentDayCount = Event::AVAILABLE_METRICS['pages-improved-pageviews-avg'];
295
        $end = new DateTime('yesterday midnight');
296
        $pageviews = 0;
297
298
        $stmt = $this->getPageTitles($dbName, $pageIds, true);
299
        $pageTitles = [];
300
        $totalProcessed = 0;
301
302
        // PageviewsRepository will fetch pageviews asynchronously, so we get the page titles in chunks
303
        // and call the appropriate method. We also don't want to put too many titles in memory at one time.
304
        while ($result = $stmt->fetch()) {
305
            $totalProcessed++;
306
            $pageTitles[] = $result['page_title'];
307
308
            if (100 === count($pageTitles) || $totalProcessed === count($pageIds)) {
309
                if ($getDailyAverage) {
310
                    $pageviews += $pageviewsRepo->getAvgPageviews($domain, $pageTitles, $recentDayCount);
311
                } else {
312
                    $pageviews += $pageviewsRepo->getPageviews($domain, $pageTitles, $start, $end);
313
                }
314
                $pageTitles = [];
315
            }
316
        }
317
318
        return $pageviews;
319
    }
320
321
    /**
322
     * Get the page titles of the pages with the given IDs.
323
     * @param string $dbName
324
     * @param int[] $pageIds
325
     * @param bool $stmt Whether to get only the statement, so that the calling method can use fetch().
326
     * @param bool $includePageIds Whether to include page IDs in the result.
327
     * @return mixed[]|\Doctrine\DBAL\Driver\ResultStatement
328
     */
329
    public function getPageTitles(string $dbName, array $pageIds, bool $stmt = false, bool $includePageIds = false)
330
    {
331
        $rqb = $this->getReplicaConnection()->createQueryBuilder();
332
        $select = $includePageIds ? ['page_id', 'page_title'] : 'page_title';
333
        $rqb->select($select)
334
            ->from("$dbName.page")
335
            ->where($rqb->expr()->in('page_id', ':ids'))
336
            ->setParameter('ids', $pageIds, Connection::PARAM_INT_ARRAY);
337
        $result = $this->executeQueryBuilder($rqb);
338
339
        return $stmt ? $result : $result->fetchAll(\PDO::FETCH_COLUMN);
340
    }
341
342
    /**
343
     * Calculates the number of bytes changed during an event
344
     *
345
     * @param Event $event
346
     * @param string $dbName
347
     * @param int[] $pageIds
348
     * @param int[] $actors
349
     * @return int
350
     */
351
    public function getBytesChanged(Event $event, string $dbName, array $pageIds, array $actors): int
352
    {
353
        $revisionTable = $this->getTableName('revision');
354
        $pageTable = $this->getTableName('page');
355
        if ($actors) {
0 ignored issues
show
Bug Best Practice introduced by
The expression $actors of type integer[] is implicitly converted to a boolean; are you sure this is intended? If so, consider using ! empty($expr) instead to make it clear that you intend to check for an array without elements.

This check marks implicit conversions of arrays to boolean values in a comparison. While in PHP an empty array is considered to be equal (but not identical) to false, this is not always apparent.

Consider making the comparison explicit by using empty(..) or ! empty(...) instead.

Loading history...
356
            $actorCond = 'AND cur.rev_actor IN (:actors)';
357
        } else {
358
            $actorCond = '';
359
        }
360
361
        $after = "SELECT COALESCE(rev_len, 0)
362
            FROM $dbName.$revisionTable cur
363
            WHERE rev_page=page_id
364
              AND rev_timestamp BETWEEN :start AND :end
365
              {$actorCond}
366
            ORDER BY rev_timestamp DESC
367
            LIMIT 1";
368
369
        $before = "SELECT COALESCE(prev.rev_len, 0)
370
            FROM $dbName.$revisionTable cur
371
                LEFT JOIN $dbName.$revisionTable prev ON cur.rev_parent_id=prev.rev_id
372
            WHERE cur.rev_page=page_id
373
              AND cur.rev_timestamp BETWEEN :start AND :end
374
              {$actorCond}
375
            ORDER BY cur.rev_timestamp ASC
376
            LIMIT 1";
377
378
        $outerSql = "SELECT SUM(after) - SUM(before_)
379
            FROM (
380
                SELECT ($after) after, ($before) before_
381
                    FROM $dbName.$pageTable
382
                    WHERE page_id IN (:pageIds)
383
                ) t1";
384
385
        $res = $this->executeReplicaQueryWithTypes(
386
            $outerSql,
387
            [
388
                'start' => $event->getStartUTC()->format('YmdHis'),
389
                'end' => $event->getEndUTC()->format('YmdHis'),
390
                'pageIds' => $pageIds,
391
                'actors' => $actors,
392
            ],
393
            [
394
                'pageIds' => Connection::PARAM_INT_ARRAY,
395
                'actors' => Connection::PARAM_INT_ARRAY,
396
            ]
397
        );
398
399
        return (int)$res->fetchColumn();
400
    }
401
402
    /**
403
     * Get the list of users participating in an event with no predefined user list
404
     *
405
     * @param string $dbName
406
     * @param int[] $pageIds
407
     * @param Event $event
408
     * @return string[]
409
     */
410
    public function getUsersFromPageIDs(string $dbName, array $pageIds, Event $event): array
411
    {
412
        $revisionTable = $this->getTableName('revision');
413
        $rqb = $this->getReplicaConnection()->createQueryBuilder();
414
        $rqb->select('DISTINCT(actor_name)')
415
            ->from("$dbName.$revisionTable", 'r')
416
            ->join('r', "$dbName.actor", 'a', 'a.actor_id = r.rev_actor')
417
            ->where('rev_page IN (:pageIds)')
418
            ->andWhere('actor_user IS NOT NULL')
419
            ->andWhere('rev_timestamp BETWEEN :start AND :end')
420
            ->setParameter('pageIds', $pageIds, Connection::PARAM_INT_ARRAY)
421
            ->setParameter('start', $event->getStartUTC()->format('YmdHis'))
422
            ->setParameter('end', $event->getEndUTC()->format('YmdHis'));
423
424
        return $this->executeQueryBuilder($rqb)->fetchAll(\PDO::FETCH_COLUMN);
425
    }
426
427
    /**
428
     * Get data for a single page, to be included in the Pages Created report.
429
     * @param string $dbName
430
     * @param int $pageId
431
     * @param string $pageTitle
432
     * @param string[] $usernames
433
     * @param DateTime $end
434
     * @return string[]
435
     */
436
    public function getSinglePageCreatedData(
437
        string $dbName,
438
        int $pageId,
439
        string $pageTitle,
440
        array $usernames,
441
        DateTime $end
442
    ): array {
443
        // Use cache if it exists.
444
        $cacheKey = $this->getCacheKey(func_get_args(), 'pages_created_info');
445
        if ($this->cache->hasItem($cacheKey)) {
446
            return $this->cache->getItem($cacheKey)->get();
447
        }
448
449
        $end = $end->format('YmdHis');
450
        $usernamesSql = empty($usernames) ? '' : 'AND rev_user_text IN (:usernames)';
451
452
        // Only use revision_userindex when filtering by user.
453
        $userRevisionTable = empty($usernames) ? 'revision' : 'revision_userindex';
454
455
        $sql = "SELECT `metric`, `value` FROM (
456
                    (
457
                        SELECT 'creator' AS `metric`, rev_user_text AS `value`
458
                        FROM $dbName.revision
459
                        WHERE rev_page = :pageId
460
                        LIMIT 1
461
                    ) UNION (
462
                        SELECT 'edits' AS `metric`, COUNT(*) AS `value`
463
                        FROM $dbName.$userRevisionTable
464
                        WHERE rev_page = :pageId
465
                            AND rev_timestamp <= :end
466
                            $usernamesSql
467
                    ) UNION (
468
                        SELECT 'bytes' AS `metric`, rev_len AS `value`
469
                        FROM $dbName.$userRevisionTable
470
                        WHERE rev_page = :pageId
471
                            AND rev_timestamp <= :end
472
                            $usernamesSql
473
                        ORDER BY rev_timestamp DESC
474
                        LIMIT 1
475
                    ) UNION (
476
                        SELECT 'links' AS `metric`, COUNT(*) AS `value`
477
                        FROM $dbName.pagelinks
478
                        JOIN $dbName.page ON page_id = pl_from
479
                        WHERE pl_from_namespace = 0
480
                            AND pl_namespace = 0
481
                            AND pl_title = :pageTitle
482
                            AND page_is_redirect = 0
483
                    )
484
                ) t1";
485
486
        $ret = $this->executeReplicaQueryWithTypes(
487
            $sql,
488
            [
489
                'pageId' => $pageId,
490
                'pageTitle' => $pageTitle,
491
                'usernames' => $usernames,
492
                'end' => $end,
493
            ],
494
            [
495
                'usernames' => Connection::PARAM_STR_ARRAY,
496
            ]
497
        )->fetchAll(\PDO::FETCH_KEY_PAIR);
498
499
        // Cache for 10 minutes.
500
        return $this->setCache($cacheKey, $ret, 'PT10M');
501
    }
502
503
    /**
504
     * Get the data needed for the Pages Created report, for a single EventWiki.
505
     * @param EventWiki $wiki
506
     * @param string[] $usernames
507
     * @return mixed[]
508
     */
509
    public function getPagesCreatedData(EventWiki $wiki, array $usernames): array
510
    {
511
        if ($wiki->isFamilyWiki()) {
512
            return [];
513
        }
514
515
        $dbName = $this->getDbNameFromDomain($wiki->getDomain());
516
        $pageviewsRepo = $this->getPageviewsRepository();
517
        $avgPageviewsOffset = Event::AVAILABLE_METRICS['pages-improved-pageviews-avg'];
518
        $pages = $this->getPageTitles($dbName, $wiki->getPagesCreated(), true, true);
519
        $start = $wiki->getEvent()->getStartUTC();
520
        $end = $wiki->getEvent()->getEndUTC();
521
        $now = new DateTime('yesterday midnight');
522
        $data = [];
523
524
        while ($page = $pages->fetch()) {
525
            // FIXME: async?
526
            [$pageviews, $avgPageviews] = $pageviewsRepo->getPageviews(
527
                $wiki->getDomain(),
528
                [$page['page_title']],
529
                $start,
530
                $now,
531
                $avgPageviewsOffset
532
            );
533
534
            $pageInfo = $this->getSinglePageCreatedData(
535
                $dbName,
536
                (int)$page['page_id'],
537
                $page['page_title'],
538
                $usernames,
539
                $end
540
            );
541
542
            $data[] = array_merge($pageInfo, [
543
                'pageTitle' => $page['page_title'],
544
                'wiki' => $wiki->getDomain(),
545
                'pageviews' => (int)$pageviews,
546
                'avgPageviews' => (int)$avgPageviews,
547
            ]);
548
        }
549
550
        return $data;
551
    }
552
553
    /**
554
     * Get data for a single page, to be included in the Pages Improved report.
555
     * @param string $dbName
556
     * @param int $pageId
557
     * @param string $pageTitle
558
     * @param string[] $usernames
559
     * @param DateTime $start
560
     * @param DateTime $end
561
     * @return string[]
562
     * @throws \Psr\Cache\InvalidArgumentException
563
     */
564
    public function getSinglePageImprovedData(
565
        string $dbName,
566
        int $pageId,
567
        string $pageTitle,
568
        array $usernames,
569
        DateTime $start,
570
        DateTime $end
571
    ): array {
572
        // Use cache if it exists.
573
        $cacheKey = $this->getCacheKey(func_get_args(), 'pages_improved_info');
574
        if ($this->cache->hasItem($cacheKey)) {
575
            return $this->cache->getItem($cacheKey)->get();
576
        }
577
578
        $start = $start->format('YmdHis');
579
        $end = $end->format('YmdHis');
580
        $usernamesSql = empty($usernames) ? '' : 'AND rev.rev_user_text IN (:usernames)';
581
582
        // Only use revision_userindex when filtering by user.
583
        $userRevisionTable = empty($usernames) ? 'revision' : 'revision_userindex';
584
585
        $sql = "SELECT `metric`, `value` FROM (
586
                    (
587
                        SELECT 'edits' AS `metric`, COUNT(*) AS `value`
588
                        FROM $dbName.$userRevisionTable rev
589
                        WHERE rev_page = :pageId
590
                            AND rev_timestamp BETWEEN :start AND :end
591
                            $usernamesSql
592
                    ) UNION (
593
                        SELECT 'start_bytes' AS `metric`, COALESCE(prev.rev_len, 0) AS `value`
594
                            FROM $dbName.$userRevisionTable rev
595
                                LEFT JOIN $dbName.$userRevisionTable prev ON rev.rev_parent_id=prev.rev_id
596
                            WHERE rev.rev_page=:pageId
597
                              AND rev.rev_timestamp BETWEEN :start AND :end
598
                              {$usernamesSql}
599
                            ORDER BY rev.rev_timestamp ASC
600
                            LIMIT 1
601
                    ) UNION (
602
                        SELECT 'end_bytes' AS `metric`, COALESCE(rev_len, 0) AS `value`
603
                            FROM $dbName.$userRevisionTable rev
604
                            WHERE rev_page=:pageId
605
                              AND rev_timestamp BETWEEN :start AND :end
606
                              {$usernamesSql}
607
                            ORDER BY rev_timestamp DESC
608
                            LIMIT 1
609
                    ) UNION (
610
                        SELECT 'links' AS `metric`, COUNT(*) AS `value`
611
                        FROM $dbName.pagelinks
612
                        JOIN $dbName.page ON page_id = pl_from
613
                        WHERE pl_from_namespace = 0
614
                            AND pl_namespace = 0
615
                            AND pl_title = :pageTitle
616
                            AND page_is_redirect = 0
617
                    )
618
                ) t1";
619
620
        $rows = $this->executeReplicaQueryWithTypes(
621
            $sql,
622
            [
623
                'pageId' => $pageId,
624
                'pageTitle' => $pageTitle,
625
                'usernames' => $usernames,
626
                'start' => $start,
627
                'end' => $end,
628
            ],
629
            [
630
                'usernames' => Connection::PARAM_STR_ARRAY,
631
            ]
632
        )->fetchAll(\PDO::FETCH_KEY_PAIR);
633
634
        $ret = [
635
            'edits' => $rows['edits'],
636
            'links' => $rows['links'],
637
            'bytes' => $rows['end_bytes'] - $rows['start_bytes'],
638
        ];
639
640
        // Cache for 10 minutes.
641
        return $this->setCache($cacheKey, $ret, 'PT10M');
642
    }
643
644
    /**
645
     * Get the data needed for the Pages Created report, for a single EventWiki.
646
     * @param EventWiki $wiki
647
     * @param string[] $usernames
648
     * @return mixed[]
649
     */
650
    public function getPagesImprovedData(EventWiki $wiki, array $usernames): array
651
    {
652
        if ($wiki->isFamilyWiki()) {
653
            return [];
654
        }
655
656
        $dbName = $this->getDbNameFromDomain($wiki->getDomain());
657
        $pageviewsRepo = $this->getPageviewsRepository();
658
        $avgPageviewsOffset = Event::AVAILABLE_METRICS['pages-improved-pageviews-avg'];
659
        $pages = $this->getPageTitles($dbName, $wiki->getPagesImproved(), true, true);
660
        $start = $wiki->getEvent()->getStartUTC();
661
        $end = $wiki->getEvent()->getEndUTC();
662
        $data = [];
663
664
        while ($page = $pages->fetch()) {
665
            // FIXME: async?
666
            $avgPageviews = $pageviewsRepo->getAvgPageviews(
667
                $wiki->getDomain(),
668
                [$page['page_title']],
669
                $avgPageviewsOffset
670
            );
671
672
            $pageInfo = $this->getSinglePageImprovedData(
673
                $dbName,
674
                (int)$page['page_id'],
675
                $page['page_title'],
676
                $usernames,
677
                $start,
678
                $end
679
            );
680
681
            $data[] = array_merge($pageInfo, [
682
                'pageTitle' => $page['page_title'],
683
                'wiki' => $wiki->getDomain(),
684
                'avgPageviews' => (int)$avgPageviews,
685
            ]);
686
        }
687
688
        return $data;
689
    }
690
691
    /**
692
     * Creates and initializes a pageviews repository
693
     * @return PageviewsRepository
694
     */
695
    private function getPageviewsRepository(): PageviewsRepository
696
    {
697
        $repo = new PageviewsRepository();
698
        $repo->setLogger($this->log);
699
        return $repo;
700
    }
701
}
702