1
|
|
|
<?php |
2
|
|
|
/** |
3
|
|
|
* This file contains only the PagesRepository class. |
4
|
|
|
*/ |
5
|
|
|
|
6
|
|
|
namespace Xtools; |
7
|
|
|
|
8
|
|
|
use DateInterval; |
9
|
|
|
use Mediawiki\Api\SimpleRequest; |
10
|
|
|
use GuzzleHttp; |
11
|
|
|
|
12
|
|
|
/** |
13
|
|
|
* A PagesRepository fetches data about Pages, either singularly or for multiple. |
14
|
|
|
*/ |
15
|
|
|
class PagesRepository extends Repository |
16
|
|
|
{ |
17
|
|
|
|
18
|
|
|
/** |
19
|
|
|
* Get metadata about a single page from the API. |
20
|
|
|
* @param Project $project The project to which the page belongs. |
21
|
|
|
* @param string $pageTitle Page title. |
22
|
|
|
* @return string[] Array with some of the following keys: pageid, title, missing, displaytitle, |
23
|
|
|
* url. |
24
|
|
|
*/ |
25
|
|
|
public function getPageInfo(Project $project, $pageTitle) |
26
|
|
|
{ |
27
|
|
|
$info = $this->getPagesInfo($project, [$pageTitle]); |
28
|
|
|
return array_shift($info); |
29
|
|
|
} |
30
|
|
|
|
31
|
|
|
/** |
32
|
|
|
* Get metadata about a set of pages from the API. |
33
|
|
|
* @param Project $project The project to which the pages belong. |
34
|
|
|
* @param string[] $pageTitles Array of page titles. |
35
|
|
|
* @return string[] Array keyed by the page names, each element with some of the |
36
|
|
|
* following keys: pageid, title, missing, displaytitle, url. |
37
|
|
|
*/ |
38
|
|
|
public function getPagesInfo(Project $project, $pageTitles) |
39
|
|
|
{ |
40
|
|
|
// @TODO: Also include 'extlinks' prop when we start checking for dead external links. |
41
|
|
|
$params = [ |
42
|
|
|
'prop' => 'info|pageprops', |
43
|
|
|
'inprop' => 'protection|talkid|watched|watchers|notificationtimestamp|subjectid|url|readable|displaytitle', |
44
|
|
|
'converttitles' => '', |
45
|
|
|
// 'ellimit' => 20, |
|
|
|
|
46
|
|
|
// 'elexpandurl' => '', |
|
|
|
|
47
|
|
|
'titles' => join('|', $pageTitles), |
48
|
|
|
'formatversion' => 2 |
49
|
|
|
// 'pageids' => $pageIds // FIXME: allow page IDs |
|
|
|
|
50
|
|
|
]; |
51
|
|
|
|
52
|
|
|
$query = new SimpleRequest('query', $params); |
53
|
|
|
$api = $this->getMediawikiApi($project); |
54
|
|
|
$res = $api->getRequest($query); |
55
|
|
|
$result = []; |
56
|
|
|
if (isset($res['query']['pages'])) { |
57
|
|
|
foreach ($res['query']['pages'] as $pageInfo) { |
58
|
|
|
$result[$pageInfo['title']] = $pageInfo; |
59
|
|
|
} |
60
|
|
|
} |
61
|
|
|
return $result; |
62
|
|
|
} |
63
|
|
|
|
64
|
|
|
/** |
65
|
|
|
* Get revisions of a single page. |
66
|
|
|
* @param Page $page The page. |
67
|
|
|
* @param User|null $user Specify to get only revisions by the given user. |
68
|
|
|
* @return string[] Each member with keys: id, timestamp, length- |
69
|
|
|
*/ |
70
|
|
|
public function getRevisions(Page $page, User $user = null) |
71
|
|
|
{ |
72
|
|
|
$cacheKey = 'revisions.'.$page->getId(); |
73
|
|
|
if ($user) { |
74
|
|
|
$cacheKey .= '.'.$user->getCacheKey(); |
75
|
|
|
} |
76
|
|
|
|
77
|
|
|
if ($this->cache->hasItem($cacheKey)) { |
78
|
|
|
return $this->cache->getItem($cacheKey)->get(); |
79
|
|
|
} |
80
|
|
|
|
81
|
|
|
$this->stopwatch->start($cacheKey, 'XTools'); |
82
|
|
|
|
83
|
|
|
$stmt = $this->getRevisionsStmt($page, $user); |
84
|
|
|
$result = $stmt->fetchAll(); |
85
|
|
|
|
86
|
|
|
// Cache for 10 minutes, and return. |
|
|
|
|
87
|
|
|
$cacheItem = $this->cache->getItem($cacheKey) |
88
|
|
|
->set($result) |
89
|
|
|
->expiresAfter(new DateInterval('PT10M')); |
90
|
|
|
$this->cache->save($cacheItem); |
91
|
|
|
$this->stopwatch->stop($cacheKey); |
92
|
|
|
|
93
|
|
|
return $result; |
94
|
|
|
} |
95
|
|
|
|
96
|
|
|
/** |
97
|
|
|
* Get the statement for a single revision, so that you can iterate row by row. |
98
|
|
|
* @param Page $page The page. |
99
|
|
|
* @param User|null $user Specify to get only revisions by the given user. |
100
|
|
|
* @return Doctrine\DBAL\Driver\PDOStatement |
101
|
|
|
*/ |
102
|
|
View Code Duplication |
public function getRevisionsStmt(Page $page, User $user = null) |
|
|
|
|
103
|
|
|
{ |
104
|
|
|
$revTable = $this->getTableName($page->getProject()->getDatabaseName(), 'revision'); |
105
|
|
|
$userClause = $user ? "revs.rev_user_text in (:username) AND " : ""; |
106
|
|
|
|
107
|
|
|
$sql = "SELECT |
108
|
|
|
revs.rev_id AS id, |
109
|
|
|
revs.rev_timestamp AS timestamp, |
110
|
|
|
revs.rev_minor_edit AS minor, |
111
|
|
|
revs.rev_len AS length, |
112
|
|
|
(CAST(revs.rev_len AS SIGNED) - IFNULL(parentrevs.rev_len, 0)) AS length_change, |
113
|
|
|
revs.rev_user AS user_id, |
114
|
|
|
revs.rev_user_text AS username, |
115
|
|
|
revs.rev_comment AS comment |
116
|
|
|
FROM $revTable AS revs |
117
|
|
|
LEFT JOIN $revTable AS parentrevs ON (revs.rev_parent_id = parentrevs.rev_id) |
118
|
|
|
WHERE $userClause revs.rev_page = :pageid |
119
|
|
|
ORDER BY revs.rev_timestamp ASC"; |
120
|
|
|
|
121
|
|
|
$params = ['pageid' => $page->getId()]; |
122
|
|
|
if ($user) { |
123
|
|
|
$params['username'] = $user->getUsername(); |
124
|
|
|
} |
125
|
|
|
|
126
|
|
|
$conn = $this->getProjectsConnection(); |
127
|
|
|
return $conn->executeQuery($sql, $params); |
128
|
|
|
} |
129
|
|
|
|
130
|
|
|
/** |
131
|
|
|
* Get a count of the number of revisions of a single page |
132
|
|
|
* @param Page $page The page. |
133
|
|
|
* @param User|null $user Specify to only count revisions by the given user. |
134
|
|
|
* @return int |
135
|
|
|
*/ |
136
|
|
View Code Duplication |
public function getNumRevisions(Page $page, User $user = null) |
|
|
|
|
137
|
|
|
{ |
138
|
|
|
$revTable = $this->getTableName($page->getProject()->getDatabaseName(), 'revision'); |
139
|
|
|
$userClause = $user ? "rev_user_text in (:username) AND " : ""; |
140
|
|
|
|
141
|
|
|
$sql = "SELECT COUNT(*) |
142
|
|
|
FROM $revTable |
143
|
|
|
WHERE $userClause rev_page = :pageid"; |
144
|
|
|
$params = ['pageid' => $page->getId()]; |
145
|
|
|
if ($user) { |
146
|
|
|
$params['username'] = $user->getUsername(); |
147
|
|
|
} |
148
|
|
|
$conn = $this->getProjectsConnection(); |
149
|
|
|
return $conn->executeQuery($sql, $params)->fetchColumn(0); |
150
|
|
|
} |
151
|
|
|
|
152
|
|
|
/** |
153
|
|
|
* Get various basic info used in the API, including the |
154
|
|
|
* number of revisions, unique authors, initial author |
155
|
|
|
* and edit count of the initial author. |
156
|
|
|
* This is combined into one query for better performance. |
157
|
|
|
* Caching is intentionally disabled, because using the gadget, |
158
|
|
|
* this will get hit for a different page constantly, where |
159
|
|
|
* the likelihood of cache benefiting us is slim. |
160
|
|
|
* @param Page $page The page. |
161
|
|
|
* @return string[] |
162
|
|
|
*/ |
163
|
|
|
public function getBasicEditingInfo(Page $page) |
164
|
|
|
{ |
165
|
|
|
$revTable = $this->getTableName($page->getProject()->getDatabaseName(), 'revision'); |
166
|
|
|
$userTable = $this->getTableName($page->getProject()->getDatabaseName(), 'user'); |
167
|
|
|
|
168
|
|
|
$sql = "SELECT *, ( |
169
|
|
|
SELECT user_editcount |
170
|
|
|
FROM $userTable |
171
|
|
|
WHERE user_name = author |
172
|
|
|
) AS author_editcount |
173
|
|
|
FROM ( |
174
|
|
|
( |
175
|
|
|
SELECT COUNT(*) AS num_edits, |
176
|
|
|
COUNT(DISTINCT(rev_user_text)) AS num_editors |
177
|
|
|
FROM $revTable |
178
|
|
|
WHERE rev_page = :pageid |
179
|
|
|
) totals, |
180
|
|
|
( |
181
|
|
|
# With really old pages, the rev_timestamp may need to be sorted ASC, |
182
|
|
|
# and the lowest rev_id may not be the first revision. |
183
|
|
|
SELECT rev_user_text AS author, |
184
|
|
|
rev_timestamp AS created_at |
185
|
|
|
FROM $revTable |
186
|
|
|
WHERE rev_page = :pageid |
187
|
|
|
ORDER BY rev_timestamp ASC |
188
|
|
|
LIMIT 1 |
189
|
|
|
) initial_rev, |
190
|
|
|
( |
191
|
|
|
SELECT MAX(rev_timestamp) AS modified_at |
192
|
|
|
FROM $revTable |
193
|
|
|
WHERE rev_page = :pageid |
194
|
|
|
) last_rev |
195
|
|
|
);"; |
196
|
|
|
$params = ['pageid' => $page->getId()]; |
197
|
|
|
$conn = $this->getProjectsConnection(); |
198
|
|
|
return $conn->executeQuery($sql, $params)->fetch(); |
199
|
|
|
} |
200
|
|
|
|
201
|
|
|
/** |
202
|
|
|
* Get assessment data for the given pages |
203
|
|
|
* @param Project $project The project to which the pages belong. |
204
|
|
|
* @param int[] $pageIds Page IDs |
205
|
|
|
* @return string[] Assessment data as retrieved from the database. |
206
|
|
|
*/ |
207
|
|
|
public function getAssessments(Project $project, $pageIds) |
208
|
|
|
{ |
209
|
|
|
if (!$project->hasPageAssessments()) { |
210
|
|
|
return []; |
211
|
|
|
} |
212
|
|
|
$paTable = $this->getTableName($project->getDatabaseName(), 'page_assessments'); |
213
|
|
|
$papTable = $this->getTableName($project->getDatabaseName(), 'page_assessments_projects'); |
214
|
|
|
$pageIds = implode($pageIds, ','); |
215
|
|
|
|
216
|
|
|
$query = "SELECT pap_project_title AS wikiproject, pa_class AS class, pa_importance AS importance |
217
|
|
|
FROM $paTable |
218
|
|
|
LEFT JOIN $papTable ON pa_project_id = pap_project_id |
219
|
|
|
WHERE pa_page_id IN ($pageIds)"; |
220
|
|
|
|
221
|
|
|
$conn = $this->getProjectsConnection(); |
222
|
|
|
return $conn->executeQuery($query)->fetchAll(); |
223
|
|
|
} |
224
|
|
|
|
225
|
|
|
/** |
226
|
|
|
* Get any CheckWiki errors of a single page |
227
|
|
|
* @param Page $page |
228
|
|
|
* @return array Results from query |
229
|
|
|
*/ |
230
|
|
|
public function getCheckWikiErrors(Page $page) |
231
|
|
|
{ |
232
|
|
|
// Only support mainspace on Labs installations |
233
|
|
|
if ($page->getNamespace() !== 0 || !$this->isLabs()) { |
234
|
|
|
return []; |
235
|
|
|
} |
236
|
|
|
|
237
|
|
|
$sql = "SELECT error, notice, found, name_trans AS name, prio, text_trans AS explanation |
238
|
|
|
FROM s51080__checkwiki_p.cw_error a |
239
|
|
|
JOIN s51080__checkwiki_p.cw_overview_errors b |
240
|
|
|
WHERE a.project = b.project |
241
|
|
|
AND a.project = :dbName |
242
|
|
|
AND a.title = :title |
243
|
|
|
AND a.error = b.id |
244
|
|
|
AND a.ok = 0"; |
245
|
|
|
|
246
|
|
|
// remove _p if present |
247
|
|
|
$dbName = preg_replace('/_p$/', '', $page->getProject()->getDatabaseName()); |
248
|
|
|
|
249
|
|
|
// Page title without underscores (str_replace just to be sure) |
250
|
|
|
$pageTitle = str_replace('_', ' ', $page->getTitle()); |
251
|
|
|
|
252
|
|
|
$resultQuery = $this->getToolsConnection()->prepare($sql); |
253
|
|
|
$resultQuery->bindParam(':dbName', $dbName); |
254
|
|
|
$resultQuery->bindParam(':title', $pageTitle); |
255
|
|
|
$resultQuery->execute(); |
256
|
|
|
|
257
|
|
|
return $resultQuery->fetchAll(); |
258
|
|
|
} |
259
|
|
|
|
260
|
|
|
/** |
261
|
|
|
* Get basic wikidata on the page: label and description. |
262
|
|
|
* @param Page $page |
263
|
|
|
* @return string[] In the format: |
264
|
|
|
* [[ |
265
|
|
|
* 'term' => string such as 'label', |
266
|
|
|
* 'term_text' => string (value for 'label'), |
267
|
|
|
* ], ... ] |
268
|
|
|
*/ |
269
|
|
|
public function getWikidataInfo(Page $page) |
270
|
|
|
{ |
271
|
|
|
if (empty($page->getWikidataId())) { |
272
|
|
|
return []; |
273
|
|
|
} |
274
|
|
|
|
275
|
|
|
$wikidataId = ltrim($page->getWikidataId(), 'Q'); |
276
|
|
|
$lang = $page->getProject()->getLang(); |
277
|
|
|
|
278
|
|
|
$sql = "SELECT IF(term_type = 'label', 'label', 'description') AS term, term_text |
279
|
|
|
FROM wikidatawiki_p.wb_entity_per_page |
280
|
|
|
JOIN wikidatawiki_p.page ON epp_page_id = page_id |
281
|
|
|
JOIN wikidatawiki_p.wb_terms ON term_entity_id = epp_entity_id |
282
|
|
|
AND term_language = :lang |
283
|
|
|
AND term_type IN ('label', 'description') |
284
|
|
|
WHERE epp_entity_id = :wikidataId |
285
|
|
|
|
286
|
|
|
UNION |
287
|
|
|
|
288
|
|
|
SELECT pl_title AS term, wb_terms.term_text |
289
|
|
|
FROM wikidatawiki_p.pagelinks |
290
|
|
|
JOIN wikidatawiki_p.wb_terms ON term_entity_id = SUBSTRING(pl_title, 2) |
291
|
|
|
AND term_entity_type = (IF(SUBSTRING(pl_title, 1, 1) = 'Q', 'item', 'property')) |
292
|
|
|
AND term_language = :lang |
293
|
|
|
AND term_type = 'label' |
294
|
|
|
WHERE pl_namespace IN (0, 120) |
295
|
|
|
AND pl_from = ( |
296
|
|
|
SELECT page_id FROM wikidatawiki_p.page |
297
|
|
|
WHERE page_namespace = 0 |
298
|
|
|
AND page_title = 'Q:wikidataId' |
299
|
|
|
)"; |
300
|
|
|
|
301
|
|
|
$resultQuery = $this->getProjectsConnection()->prepare($sql); |
302
|
|
|
$resultQuery->bindParam(':lang', $lang); |
303
|
|
|
$resultQuery->bindParam(':wikidataId', $wikidataId); |
304
|
|
|
$resultQuery->execute(); |
305
|
|
|
|
306
|
|
|
return $resultQuery->fetchAll(); |
307
|
|
|
} |
308
|
|
|
|
309
|
|
|
/** |
310
|
|
|
* Get or count all wikidata items for the given page, |
311
|
|
|
* not just languages of sister projects |
312
|
|
|
* @param Page $page |
313
|
|
|
* @param bool $count Set to true to get only a COUNT |
314
|
|
|
* @return string[]|int Records as returend by the DB, |
315
|
|
|
* or raw COUNT of the records. |
316
|
|
|
*/ |
317
|
|
|
public function getWikidataItems(Page $page, $count = false) |
318
|
|
|
{ |
319
|
|
|
if (!$page->getWikidataId()) { |
320
|
|
|
return $count ? 0 : []; |
321
|
|
|
} |
322
|
|
|
|
323
|
|
|
$wikidataId = ltrim($page->getWikidataId(), 'Q'); |
324
|
|
|
|
325
|
|
|
$sql = "SELECT " . ($count ? 'COUNT(*) AS count' : '*') . " |
326
|
|
|
FROM wikidatawiki_p.wb_items_per_site |
327
|
|
|
WHERE ips_item_id = :wikidataId"; |
328
|
|
|
|
329
|
|
|
$resultQuery = $this->getProjectsConnection()->prepare($sql); |
330
|
|
|
$resultQuery->bindParam(':wikidataId', $wikidataId); |
331
|
|
|
$resultQuery->execute(); |
332
|
|
|
|
333
|
|
|
$result = $resultQuery->fetchAll(); |
334
|
|
|
|
335
|
|
|
return $count ? (int) $result[0]['count'] : $result; |
336
|
|
|
} |
337
|
|
|
|
338
|
|
|
/** |
339
|
|
|
* Get number of in and outgoing links and redirects to the given page. |
340
|
|
|
* @param Page $page |
341
|
|
|
* @return string[] Counts with the keys 'links_ext_count', 'links_out_count', |
342
|
|
|
* 'links_in_count' and 'redirects_count' |
343
|
|
|
*/ |
344
|
|
|
public function countLinksAndRedirects(Page $page) |
345
|
|
|
{ |
346
|
|
|
$externalLinksTable = $this->getTableName($page->getProject()->getDatabaseName(), 'externallinks'); |
347
|
|
|
$pageLinksTable = $this->getTableName($page->getProject()->getDatabaseName(), 'pagelinks'); |
348
|
|
|
$redirectTable = $this->getTableName($page->getProject()->getDatabaseName(), 'redirect'); |
349
|
|
|
|
350
|
|
|
$sql = "SELECT COUNT(*) AS value, 'links_ext' AS type |
351
|
|
|
FROM $externalLinksTable WHERE el_from = :id |
352
|
|
|
UNION |
353
|
|
|
SELECT COUNT(*) AS value, 'links_out' AS type |
354
|
|
|
FROM $pageLinksTable WHERE pl_from = :id |
355
|
|
|
UNION |
356
|
|
|
SELECT COUNT(*) AS value, 'links_in' AS type |
357
|
|
|
FROM $pageLinksTable WHERE pl_namespace = :namespace AND pl_title = :title |
358
|
|
|
UNION |
359
|
|
|
SELECT COUNT(*) AS value, 'redirects' AS type |
360
|
|
|
FROM $redirectTable WHERE rd_namespace = :namespace AND rd_title = :title"; |
361
|
|
|
|
362
|
|
|
$params = [ |
363
|
|
|
'id' => $page->getId(), |
364
|
|
|
'title' => str_replace(' ', '_', $page->getTitle()), |
365
|
|
|
'namespace' => $page->getNamespace(), |
366
|
|
|
]; |
367
|
|
|
|
368
|
|
|
$conn = $this->getProjectsConnection(); |
369
|
|
|
$res = $conn->executeQuery($sql, $params); |
370
|
|
|
|
371
|
|
|
$data = []; |
372
|
|
|
|
373
|
|
|
// Transform to associative array by 'type' |
374
|
|
|
foreach ($res as $row) { |
375
|
|
|
$data[$row['type'] . '_count'] = $row['value']; |
376
|
|
|
} |
377
|
|
|
|
378
|
|
|
return $data; |
379
|
|
|
} |
380
|
|
|
|
381
|
|
|
/** |
382
|
|
|
* Count wikidata items for the given page, not just languages of sister projects |
383
|
|
|
* @param Page $page |
384
|
|
|
* @return int Number of records. |
385
|
|
|
*/ |
386
|
|
|
public function countWikidataItems(Page $page) |
387
|
|
|
{ |
388
|
|
|
return $this->getWikidataItems($page, true); |
389
|
|
|
} |
390
|
|
|
|
391
|
|
|
/** |
392
|
|
|
* Get page views for the given page and timeframe. |
393
|
|
|
* @param Page $page |
394
|
|
|
* @param string|DateTime $start In the format YYYYMMDD |
395
|
|
|
* @param string|DateTime $end In the format YYYYMMDD |
396
|
|
|
* @return string[] |
397
|
|
|
*/ |
398
|
|
|
public function getPageviews($page, $start, $end) |
399
|
|
|
{ |
400
|
|
|
$title = rawurldecode(str_replace(' ', '_', $page->getTitle())); |
401
|
|
|
$client = new GuzzleHttp\Client(); |
402
|
|
|
|
403
|
|
|
if ($start instanceof DateTime) { |
|
|
|
|
404
|
|
|
$start = $start->format('YYYYMMDD'); |
405
|
|
|
} |
406
|
|
|
if ($end instanceof DateTime) { |
|
|
|
|
407
|
|
|
$end = $end->format('YYYYMMDD'); |
408
|
|
|
} |
409
|
|
|
|
410
|
|
|
$project = $page->getProject()->getDomain(); |
411
|
|
|
|
412
|
|
|
$url = 'https://wikimedia.org/api/rest_v1/metrics/pageviews/per-article/' . |
413
|
|
|
"$project/all-access/user/$title/daily/$start/$end"; |
414
|
|
|
|
415
|
|
|
$res = $client->request('GET', $url); |
416
|
|
|
return json_decode($res->getBody()->getContents(), true); |
417
|
|
|
} |
418
|
|
|
} |
419
|
|
|
|
Sometimes obsolete code just ends up commented out instead of removed. In this case it is better to remove the code once you have checked you do not need it.
The code might also have been commented out for debugging purposes. In this case it is vital that someone uncomments it again or your project may behave in very unexpected ways in production.
This check looks for comments that seem to be mostly valid code and reports them.