Passed
Push — master ( 8261a5...b5172d )
by MusikAnimal
04:21
created

PageRepository::getNumRevisions()   B

Complexity

Conditions 4
Paths 5

Size

Total Lines 27
Code Lines 18

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
cc 4
eloc 18
nc 5
nop 2
dl 0
loc 27
rs 8.5806
c 0
b 0
f 0
1
<?php
2
/**
3
 * This file contains only the PageRepository class.
4
 */
5
6
namespace Xtools;
7
8
use DateInterval;
9
use Mediawiki\Api\SimpleRequest;
10
use GuzzleHttp;
11
12
/**
13
 * A PageRepository fetches data about Pages, either singularly or for multiple.
14
 * Despite the name, this does not have a direct correlation with the Pages tool.
15
 * @codeCoverageIgnore
16
 */
17
class PageRepository extends Repository
18
{
19
20
    /**
21
     * Get metadata about a single page from the API.
22
     * @param Project $project The project to which the page belongs.
23
     * @param string $pageTitle Page title.
24
     * @return string[] Array with some of the following keys: pageid, title, missing, displaytitle,
25
     * url.
26
     */
27
    public function getPageInfo(Project $project, $pageTitle)
28
    {
29
        $info = $this->getPagesInfo($project, [$pageTitle]);
30
        return array_shift($info);
31
    }
32
33
    /**
34
     * Get metadata about a set of pages from the API.
35
     * @param Project $project The project to which the pages belong.
36
     * @param string[] $pageTitles Array of page titles.
37
     * @return string[] Array keyed by the page names, each element with some of the
38
     * following keys: pageid, title, missing, displaytitle, url.
39
     */
40
    public function getPagesInfo(Project $project, $pageTitles)
41
    {
42
        // @TODO: Also include 'extlinks' prop when we start checking for dead external links.
43
        $params = [
44
            'prop' => 'info|pageprops',
45
            'inprop' => 'protection|talkid|watched|watchers|notificationtimestamp|subjectid|url|readable|displaytitle',
46
            'converttitles' => '',
47
            // 'ellimit' => 20,
48
            // 'elexpandurl' => '',
49
            'titles' => join('|', $pageTitles),
50
            'formatversion' => 2
51
            // 'pageids' => $pageIds // FIXME: allow page IDs
52
        ];
53
54
        $query = new SimpleRequest('query', $params);
55
        $api = $this->getMediawikiApi($project);
56
        $res = $api->getRequest($query);
57
        $result = [];
58
        if (isset($res['query']['pages'])) {
59
            foreach ($res['query']['pages'] as $pageInfo) {
60
                $result[$pageInfo['title']] = $pageInfo;
61
            }
62
        }
63
        return $result;
64
    }
65
66
    /**
67
     * Get the full page text of a set of pages.
68
     * @param Project $project The project to which the pages belong.
69
     * @param string[] $pageTitles Array of page titles.
70
     * @return string[] Array keyed by the page names, with the page text as the values.
71
     */
72
    public function getPagesWikitext(Project $project, $pageTitles)
73
    {
74
        $query = new SimpleRequest('query', [
75
            'prop' => 'revisions',
76
            'rvprop' => 'content',
77
            'titles' => join('|', $pageTitles),
78
            'formatversion' => 2,
79
        ]);
80
        $result = [];
81
82
        $api = $this->getMediawikiApi($project);
83
        $res = $api->getRequest($query);
84
85
        if (!isset($res['query']['pages'])) {
86
            return [];
87
        }
88
89
        foreach ($res['query']['pages'] as $page) {
90
            if (isset($page['revisions'][0]['content'])) {
91
                $result[$page['title']] = $page['revisions'][0]['content'];
92
            } else {
93
                $result[$page['title']] = '';
94
            }
95
        }
96
97
        return $result;
98
    }
99
100
    /**
101
     * Get revisions of a single page.
102
     * @param Page $page The page.
103
     * @param User|null $user Specify to get only revisions by the given user.
104
     * @return string[] Each member with keys: id, timestamp, length-
105
     */
106
    public function getRevisions(Page $page, User $user = null)
107
    {
108
        $cacheKey = $this->getCacheKey(func_get_args(), 'page_revisions');
109
        if ($this->cache->hasItem($cacheKey)) {
110
            return $this->cache->getItem($cacheKey)->get();
111
        }
112
113
        $this->stopwatch->start($cacheKey, 'XTools');
114
115
        $stmt = $this->getRevisionsStmt($page, $user);
116
        $result = $stmt->fetchAll();
117
118
        // Cache for 10 minutes, and return.
119
        $cacheItem = $this->cache->getItem($cacheKey)
120
            ->set($result)
121
            ->expiresAfter(new DateInterval('PT10M'));
122
        $this->cache->save($cacheItem);
123
        $this->stopwatch->stop($cacheKey);
124
125
        return $result;
126
    }
127
128
    /**
129
     * Get the statement for a single revision, so that you can iterate row by row.
130
     * @param Page $page The page.
131
     * @param User|null $user Specify to get only revisions by the given user.
132
     * @param int $limit Max number of revisions to process.
133
     * @param int $numRevisions Number of revisions, if known. This is used solely to determine the
134
     *   OFFSET if we are given a $limit (see below). If $limit is set and $numRevisions is not set,
135
     *   a separate query is ran to get the nuber of revisions.
136
     * @return Doctrine\DBAL\Driver\PDOStatement
0 ignored issues
show
Bug introduced by
The type Xtools\Doctrine\DBAL\Driver\PDOStatement was not found. Did you mean Doctrine\DBAL\Driver\PDOStatement? If so, make sure to prefix the type with \.
Loading history...
137
     */
138
    public function getRevisionsStmt(Page $page, User $user = null, $limit = null, $numRevisions = null)
139
    {
140
        $revTable = $this->getTableName($page->getProject()->getDatabaseName(), 'revision');
141
        $userClause = $user ? "revs.rev_user_text in (:username) AND " : "";
142
143
        // This sorts ascending by rev_timestamp because ArticleInfo must start with the oldest
144
        // revision and work its way forward for proper processing. Consequently, if we want to do
145
        // a LIMIT we want the most recent revisions, so we also need to know the total count to
146
        // supply as the OFFSET.
147
        $limitClause = '';
148
        if (intval($limit) > 0 && isset($numRevisions)) {
149
            $offset = $numRevisions - $limit;
150
            $limitClause = "LIMIT $offset, $limit";
151
        }
152
153
        $sql = "SELECT
154
                    revs.rev_id AS id,
155
                    revs.rev_timestamp AS timestamp,
156
                    revs.rev_minor_edit AS minor,
157
                    revs.rev_len AS length,
158
                    (CAST(revs.rev_len AS SIGNED) - IFNULL(parentrevs.rev_len, 0)) AS length_change,
159
                    revs.rev_user AS user_id,
160
                    revs.rev_user_text AS username,
161
                    revs.rev_comment AS comment
162
                FROM $revTable AS revs
163
                LEFT JOIN $revTable AS parentrevs ON (revs.rev_parent_id = parentrevs.rev_id)
164
                WHERE $userClause revs.rev_page = :pageid
165
                ORDER BY revs.rev_timestamp ASC
166
                $limitClause";
167
168
        $params = ['pageid' => $page->getId()];
169
        if ($user) {
170
            $params['username'] = $user->getUsername();
171
        }
172
173
        $conn = $this->getProjectsConnection();
174
        return $conn->executeQuery($sql, $params);
175
    }
176
177
    /**
178
     * Get a count of the number of revisions of a single page
179
     * @param Page $page The page.
180
     * @param User|null $user Specify to only count revisions by the given user.
181
     * @return int
182
     */
183
    public function getNumRevisions(Page $page, User $user = null)
184
    {
185
        $cacheKey = $this->getCacheKey(func_get_args(), 'page_numrevisions');
186
        if ($this->cache->hasItem($cacheKey)) {
187
            return $this->cache->getItem($cacheKey)->get();
188
        }
189
190
        $revTable = $page->getProject()->getTableName('revision');
191
        $userClause = $user ? "rev_user_text in (:username) AND " : "";
192
193
        $sql = "SELECT COUNT(*)
194
                FROM $revTable
195
                WHERE $userClause rev_page = :pageid";
196
        $params = ['pageid' => $page->getId()];
197
        if ($user) {
198
            $params['username'] = $user->getUsername();
199
        }
200
201
        $conn = $this->getProjectsConnection();
202
        $result = $conn->executeQuery($sql, $params)->fetchColumn(0);
203
204
        // Cache for 10 minutes, and return.
205
        $cacheItem = $this->cache->getItem($cacheKey)
206
            ->set($result)
207
            ->expiresAfter(new DateInterval('PT10M'));
208
        $this->cache->save($cacheItem);
209
        return $result;
210
    }
211
212
    /**
213
     * Get various basic info used in the API, including the
214
     *   number of revisions, unique authors, initial author
215
     *   and edit count of the initial author.
216
     * This is combined into one query for better performance.
217
     * Caching is only applied if it took considerable time to process,
218
     *   because using the gadget, this will get hit for a different page
219
     *   constantly, where the likelihood of cache benefiting us is slim.
220
     * @param Page $page The page.
221
     * @return string[]
222
     */
223
    public function getBasicEditingInfo(Page $page)
224
    {
225
        $cacheKey = $this->getCacheKey(func_get_args(), 'page_basicinfo');
226
        if ($this->cache->hasItem($cacheKey)) {
227
            return $this->cache->getItem($cacheKey)->get();
228
        }
229
230
        $revTable = $this->getTableName($page->getProject()->getDatabaseName(), 'revision');
231
        $userTable = $this->getTableName($page->getProject()->getDatabaseName(), 'user');
232
        $pageTable = $this->getTableName($page->getProject()->getDatabaseName(), 'page');
233
234
        $sql = "SELECT *, (
235
                   SELECT user_editcount
236
                   FROM $userTable
237
                   WHERE user_name = author
238
                ) AS author_editcount
239
                FROM (
240
                    (
241
                        SELECT COUNT(*) AS num_edits,
242
                               COUNT(DISTINCT(rev_user_text)) AS num_editors
243
                        FROM $revTable
244
                        WHERE rev_page = :pageid
245
                    ) a,
246
                    (
247
                        # With really old pages, the rev_timestamp may need to be sorted ASC,
248
                        #   and the lowest rev_id may not be the first revision.
249
                        SELECT rev_user_text AS author,
250
                               rev_timestamp AS created_at,
251
                               rev_id AS created_rev_id
252
                        FROM $revTable
253
                        WHERE rev_page = :pageid
254
                        ORDER BY rev_timestamp ASC
255
                        LIMIT 1
256
                    ) b,
257
                    (
258
                        SELECT MAX(rev_timestamp) AS modified_at
259
                        FROM $revTable
260
                        WHERE rev_page = :pageid
261
                    ) c,
262
                    (
263
                        SELECT page_latest AS modified_rev_id
264
                        FROM $pageTable
265
                        WHERE page_id = :pageid
266
                    ) d
267
                );";
268
        $params = ['pageid' => $page->getId()];
269
        $conn = $this->getProjectsConnection();
270
271
        // Get current time so we can compare timestamps
272
        // and decide whether or to cache the result.
273
        $time1 = time();
274
        $result = $conn->executeQuery($sql, $params)->fetch();
275
        $time2 = time();
276
277
        // If it took over 5 seconds, cache the result for 20 minutes.
278
        if ($time2 - $time1 > 5) {
279
            $cacheItem = $this->cache->getItem($cacheKey)
280
                ->set($result)
281
                ->expiresAfter(new DateInterval('PT20M'));
282
            $this->cache->save($cacheItem);
283
        }
284
285
        return $result;
286
    }
287
288
    /**
289
     * Get assessment data for the given pages
290
     * @param Project   $project The project to which the pages belong.
291
     * @param  int[]    $pageIds Page IDs
292
     * @return string[] Assessment data as retrieved from the database.
293
     */
294
    public function getAssessments(Project $project, $pageIds)
295
    {
296
        $cacheKey = $this->getCacheKey(func_get_args(), 'page_assessments');
297
        if ($this->cache->hasItem($cacheKey)) {
298
            return $this->cache->getItem($cacheKey)->get();
299
        }
300
301
        if (!$project->hasPageAssessments()) {
302
            return [];
303
        }
304
        $paTable = $this->getTableName($project->getDatabaseName(), 'page_assessments');
305
        $papTable = $this->getTableName($project->getDatabaseName(), 'page_assessments_projects');
306
        $pageIds = implode($pageIds, ',');
0 ignored issues
show
Bug introduced by
',' of type string is incompatible with the type array expected by parameter $pieces of implode(). ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

306
        $pageIds = implode($pageIds, /** @scrutinizer ignore-type */ ',');
Loading history...
307
308
        $query = "SELECT pap_project_title AS wikiproject, pa_class AS class, pa_importance AS importance
309
                  FROM $paTable
310
                  LEFT JOIN $papTable ON pa_project_id = pap_project_id
311
                  WHERE pa_page_id IN ($pageIds)";
312
313
        $conn = $this->getProjectsConnection();
314
        $result = $conn->executeQuery($query)->fetchAll();
315
316
        // Cache for 10 minutes, and return.
317
        $cacheItem = $this->cache->getItem($cacheKey)
318
            ->set($result)
319
            ->expiresAfter(new DateInterval('PT10M'));
320
        $this->cache->save($cacheItem);
321
        return $result;
322
    }
323
324
    /**
325
     * Get any CheckWiki errors of a single page
326
     * @param Page $page
327
     * @return array Results from query
328
     */
329
    public function getCheckWikiErrors(Page $page)
330
    {
331
        // Only support mainspace on Labs installations
332
        if ($page->getNamespace() !== 0 || !$this->isLabs()) {
333
            return [];
334
        }
335
336
        $sql = "SELECT error, notice, found, name_trans AS name, prio, text_trans AS explanation
337
                FROM s51080__checkwiki_p.cw_error a
338
                JOIN s51080__checkwiki_p.cw_overview_errors b
339
                WHERE a.project = b.project
340
                AND a.project = :dbName
341
                AND a.title = :title
342
                AND a.error = b.id
343
                AND a.ok = 0";
344
345
        // remove _p if present
346
        $dbName = preg_replace('/_p$/', '', $page->getProject()->getDatabaseName());
347
348
        // Page title without underscores (str_replace just to be sure)
349
        $pageTitle = str_replace('_', ' ', $page->getTitle());
350
351
        $resultQuery = $this->getToolsConnection()->prepare($sql);
352
        $resultQuery->bindParam(':dbName', $dbName);
353
        $resultQuery->bindParam(':title', $pageTitle);
354
        $resultQuery->execute();
355
356
        return $resultQuery->fetchAll();
357
    }
358
359
    /**
360
     * Get basic wikidata on the page: label and description.
361
     * @param Page $page
362
     * @return string[] In the format:
363
     *    [[
364
     *         'term' => string such as 'label',
365
     *         'term_text' => string (value for 'label'),
366
     *     ], ... ]
367
     */
368
    public function getWikidataInfo(Page $page)
369
    {
370
        if (empty($page->getWikidataId())) {
371
            return [];
372
        }
373
374
        $wikidataId = ltrim($page->getWikidataId(), 'Q');
375
        $lang = $page->getProject()->getLang();
376
377
        $sql = "SELECT term_type AS term, term_text
378
                FROM wikidatawiki_p.wb_terms
379
                WHERE term_entity_id = :wikidataId
380
                AND term_type IN ('label', 'description')
381
                AND term_language = :lang";
382
383
        $resultQuery = $this->getProjectsConnection()->prepare($sql);
384
        $resultQuery->bindParam(':lang', $lang);
385
        $resultQuery->bindParam(':wikidataId', $wikidataId);
386
        $resultQuery->execute();
387
388
        return $resultQuery->fetchAll();
389
    }
390
391
    /**
392
     * Get or count all wikidata items for the given page,
393
     *     not just languages of sister projects
394
     * @param Page $page
395
     * @param bool $count Set to true to get only a COUNT
396
     * @return string[]|int Records as returend by the DB,
397
     *                      or raw COUNT of the records.
398
     */
399
    public function getWikidataItems(Page $page, $count = false)
400
    {
401
        if (!$page->getWikidataId()) {
402
            return $count ? 0 : [];
403
        }
404
405
        $wikidataId = ltrim($page->getWikidataId(), 'Q');
406
407
        $sql = "SELECT " . ($count ? 'COUNT(*) AS count' : '*') . "
408
                FROM wikidatawiki_p.wb_items_per_site
409
                WHERE ips_item_id = :wikidataId";
410
411
        $resultQuery = $this->getProjectsConnection()->prepare($sql);
412
        $resultQuery->bindParam(':wikidataId', $wikidataId);
413
        $resultQuery->execute();
414
415
        $result = $resultQuery->fetchAll();
416
417
        return $count ? (int) $result[0]['count'] : $result;
418
    }
419
420
    /**
421
     * Get number of in and outgoing links and redirects to the given page.
422
     * @param Page $page
423
     * @return string[] Counts with the keys 'links_ext_count', 'links_out_count',
424
     *                  'links_in_count' and 'redirects_count'
425
     */
426
    public function countLinksAndRedirects(Page $page)
427
    {
428
        $externalLinksTable = $this->getTableName($page->getProject()->getDatabaseName(), 'externallinks');
429
        $pageLinksTable = $this->getTableName($page->getProject()->getDatabaseName(), 'pagelinks');
430
        $redirectTable = $this->getTableName($page->getProject()->getDatabaseName(), 'redirect');
431
432
        $sql = "SELECT COUNT(*) AS value, 'links_ext' AS type
433
                FROM $externalLinksTable WHERE el_from = :id
434
                UNION
435
                SELECT COUNT(*) AS value, 'links_out' AS type
436
                FROM $pageLinksTable WHERE pl_from = :id
437
                UNION
438
                SELECT COUNT(*) AS value, 'links_in' AS type
439
                FROM $pageLinksTable WHERE pl_namespace = :namespace AND pl_title = :title
440
                UNION
441
                SELECT COUNT(*) AS value, 'redirects' AS type
442
                FROM $redirectTable WHERE rd_namespace = :namespace AND rd_title = :title";
443
444
        $params = [
445
            'id' => $page->getId(),
446
            'title' => str_replace(' ', '_', $page->getTitleWithoutNamespace()),
447
            'namespace' => $page->getNamespace(),
448
        ];
449
450
        $conn = $this->getProjectsConnection();
451
        $res = $conn->executeQuery($sql, $params);
452
453
        $data = [];
454
455
        // Transform to associative array by 'type'
456
        foreach ($res as $row) {
457
            $data[$row['type'] . '_count'] = $row['value'];
458
        }
459
460
        return $data;
461
    }
462
463
    /**
464
     * Count wikidata items for the given page, not just languages of sister projects
465
     * @param Page $page
466
     * @return int Number of records.
467
     */
468
    public function countWikidataItems(Page $page)
469
    {
470
        return $this->getWikidataItems($page, true);
471
    }
472
473
    /**
474
     * Get page views for the given page and timeframe.
475
     * @FIXME use Symfony Guzzle package.
476
     * @param Page $page
477
     * @param string|DateTime $start In the format YYYYMMDD
478
     * @param string|DateTime $end In the format YYYYMMDD
479
     * @return string[]
480
     */
481
    public function getPageviews(Page $page, $start, $end)
482
    {
483
        $title = rawurlencode(str_replace(' ', '_', $page->getTitle()));
484
        $client = new GuzzleHttp\Client();
485
486
        if ($start instanceof DateTime) {
0 ignored issues
show
Bug introduced by
The type Xtools\DateTime was not found. Did you mean DateTime? If so, make sure to prefix the type with \.
Loading history...
487
            $start = $start->format('YYYYMMDD');
488
        }
489
        if ($end instanceof DateTime) {
490
            $end = $end->format('YYYYMMDD');
491
        }
492
493
        $project = $page->getProject()->getDomain();
494
495
        $url = 'https://wikimedia.org/api/rest_v1/metrics/pageviews/per-article/' .
496
            "$project/all-access/user/$title/daily/$start/$end";
497
498
        $res = $client->request('GET', $url);
499
        return json_decode($res->getBody()->getContents(), true);
500
    }
501
}
502