|
1
|
|
|
<?php |
|
2
|
|
|
/** |
|
3
|
|
|
* This file contains only the PagesRepository class. |
|
4
|
|
|
*/ |
|
5
|
|
|
|
|
6
|
|
|
namespace Xtools; |
|
7
|
|
|
|
|
8
|
|
|
use DateInterval; |
|
9
|
|
|
use Mediawiki\Api\SimpleRequest; |
|
10
|
|
|
|
|
11
|
|
|
/** |
|
12
|
|
|
* A PagesRepository fetches data about Pages, either singularly or for multiple. |
|
13
|
|
|
*/ |
|
14
|
|
|
class PagesRepository extends Repository |
|
15
|
|
|
{ |
|
16
|
|
|
|
|
17
|
|
|
/** |
|
18
|
|
|
* Get metadata about a single page from the API. |
|
19
|
|
|
* @param Project $project The project to which the page belongs. |
|
20
|
|
|
* @param string $pageTitle Page title. |
|
21
|
|
|
* @return string[] Array with some of the following keys: pageid, title, missing, displaytitle, |
|
22
|
|
|
* url. |
|
23
|
|
|
*/ |
|
24
|
|
|
public function getPageInfo(Project $project, $pageTitle) |
|
25
|
|
|
{ |
|
26
|
|
|
$info = $this->getPagesInfo($project, [$pageTitle]); |
|
27
|
|
|
return array_shift($info); |
|
28
|
|
|
} |
|
29
|
|
|
|
|
30
|
|
|
/** |
|
31
|
|
|
* Get metadata about a set of pages from the API. |
|
32
|
|
|
* @param Project $project The project to which the pages belong. |
|
33
|
|
|
* @param string[] $pageTitles Array of page titles. |
|
34
|
|
|
* @return string[] Array keyed by the page names, each element with some of the |
|
35
|
|
|
* following keys: pageid, title, missing, displaytitle, url. |
|
36
|
|
|
*/ |
|
37
|
|
|
public function getPagesInfo(Project $project, $pageTitles) |
|
38
|
|
|
{ |
|
39
|
|
|
// @TODO: Also include 'extlinks' prop when we start checking for dead external links. |
|
40
|
|
|
$params = [ |
|
41
|
|
|
'prop' => 'info|pageprops', |
|
42
|
|
|
'inprop' => 'protection|talkid|watched|watchers|notificationtimestamp|subjectid|url|readable|displaytitle', |
|
43
|
|
|
'converttitles' => '', |
|
44
|
|
|
// 'ellimit' => 20, |
|
|
|
|
|
|
45
|
|
|
// 'elexpandurl' => '', |
|
|
|
|
|
|
46
|
|
|
'titles' => join('|', $pageTitles), |
|
47
|
|
|
'formatversion' => 2 |
|
48
|
|
|
// 'pageids' => $pageIds // FIXME: allow page IDs |
|
|
|
|
|
|
49
|
|
|
]; |
|
50
|
|
|
|
|
51
|
|
|
$query = new SimpleRequest('query', $params); |
|
52
|
|
|
$api = $this->getMediawikiApi($project); |
|
53
|
|
|
$res = $api->getRequest($query); |
|
54
|
|
|
$result = []; |
|
55
|
|
|
if (isset($res['query']['pages'])) { |
|
56
|
|
|
foreach ($res['query']['pages'] as $pageInfo) { |
|
57
|
|
|
$result[$pageInfo['title']] = $pageInfo; |
|
58
|
|
|
} |
|
59
|
|
|
} |
|
60
|
|
|
return $result; |
|
61
|
|
|
} |
|
62
|
|
|
|
|
63
|
|
|
/** |
|
64
|
|
|
* Get revisions of a single page. |
|
65
|
|
|
* @param Page $page The page. |
|
66
|
|
|
* @param User|null $user Specify to get only revisions by the given user. |
|
67
|
|
|
* @return string[] Each member with keys: id, timestamp, length- |
|
68
|
|
|
*/ |
|
69
|
|
|
public function getRevisions(Page $page, User $user = null) |
|
70
|
|
|
{ |
|
71
|
|
|
$cacheKey = 'revisions.'.$page->getId(); |
|
72
|
|
|
if ($user) { |
|
73
|
|
|
$cacheKey .= '.'.$user->getUsername(); |
|
74
|
|
|
} |
|
75
|
|
|
|
|
76
|
|
|
if ($this->cache->hasItem($cacheKey)) { |
|
77
|
|
|
return $this->cache->getItem($cacheKey)->get(); |
|
78
|
|
|
} |
|
79
|
|
|
|
|
80
|
|
|
$this->stopwatch->start($cacheKey, 'XTools'); |
|
81
|
|
|
|
|
82
|
|
|
$revTable = $this->getTableName($page->getProject()->getDatabaseName(), 'revision'); |
|
83
|
|
|
$userClause = $user ? "revs.rev_user_text in (:username) AND " : ""; |
|
84
|
|
|
|
|
85
|
|
|
$query = "SELECT |
|
86
|
|
|
revs.rev_id AS id, |
|
87
|
|
|
revs.rev_timestamp AS timestamp, |
|
88
|
|
|
revs.rev_minor_edit AS minor, |
|
89
|
|
|
revs.rev_len AS length, |
|
90
|
|
|
(CAST(revs.rev_len AS SIGNED) - IFNULL(parentrevs.rev_len, 0)) AS length_change, |
|
91
|
|
|
revs.rev_user AS user_id, |
|
92
|
|
|
revs.rev_user_text AS username, |
|
93
|
|
|
revs.rev_comment AS comment |
|
94
|
|
|
FROM $revTable AS revs |
|
95
|
|
|
LEFT JOIN $revTable AS parentrevs ON (revs.rev_parent_id = parentrevs.rev_id) |
|
96
|
|
|
WHERE $userClause revs.rev_page = :pageid |
|
97
|
|
|
ORDER BY revs.rev_timestamp ASC |
|
98
|
|
|
"; |
|
99
|
|
|
$params = ['pageid' => $page->getId()]; |
|
100
|
|
|
if ($user) { |
|
101
|
|
|
$params['username'] = $user->getUsername(); |
|
102
|
|
|
} |
|
103
|
|
|
$conn = $this->getProjectsConnection(); |
|
104
|
|
|
$result = $conn->executeQuery($query, $params)->fetchAll(); |
|
105
|
|
|
|
|
106
|
|
|
// Cache for 10 minutes, and return. |
|
|
|
|
|
|
107
|
|
|
$cacheItem = $this->cache->getItem($cacheKey) |
|
108
|
|
|
->set($result) |
|
109
|
|
|
->expiresAfter(new DateInterval('PT10M')); |
|
110
|
|
|
$this->cache->save($cacheItem); |
|
111
|
|
|
$this->stopwatch->stop($cacheKey); |
|
112
|
|
|
} |
|
113
|
|
|
|
|
114
|
|
|
/** |
|
115
|
|
|
* Get a count of the number of revisions of a single page |
|
116
|
|
|
* @param Page $page The page. |
|
117
|
|
|
* @param User|null $user Specify to only count revisions by the given user. |
|
118
|
|
|
* @return int |
|
119
|
|
|
*/ |
|
120
|
|
|
public function getNumRevisions(Page $page, User $user = null) |
|
121
|
|
|
{ |
|
122
|
|
|
$revTable = $this->getTableName($page->getProject()->getDatabaseName(), 'revision'); |
|
123
|
|
|
$userClause = $user ? "rev_user_text in (:username) AND " : ""; |
|
124
|
|
|
|
|
125
|
|
|
$query = "SELECT COUNT(*) |
|
126
|
|
|
FROM $revTable |
|
127
|
|
|
WHERE $userClause rev_page = :pageid |
|
128
|
|
|
"; |
|
129
|
|
|
$params = ['pageid' => $page->getId()]; |
|
130
|
|
|
if ($user) { |
|
131
|
|
|
$params['username'] = $user->getUsername(); |
|
132
|
|
|
} |
|
133
|
|
|
$conn = $this->getProjectsConnection(); |
|
134
|
|
|
return $conn->executeQuery($query, $params)->fetchColumn(0); |
|
135
|
|
|
} |
|
136
|
|
|
|
|
137
|
|
|
/** |
|
138
|
|
|
* Get assessment data for the given pages |
|
139
|
|
|
* @param Project $project The project to which the pages belong. |
|
140
|
|
|
* @param int[] $pageIds Page IDs |
|
141
|
|
|
* @return string[] Assessment data as retrieved from the database. |
|
142
|
|
|
*/ |
|
143
|
|
|
public function getAssessments(Project $project, $pageIds) |
|
144
|
|
|
{ |
|
145
|
|
|
if (!$project->hasPageAssessments()) { |
|
146
|
|
|
return []; |
|
147
|
|
|
} |
|
148
|
|
|
$pageAssessmentsTable = $this->getTableName($project->getDatabaseName(), 'page_assessments'); |
|
|
|
|
|
|
149
|
|
|
$pageIds = implode($pageIds, ','); |
|
150
|
|
|
|
|
151
|
|
|
$query = "SELECT pap_project_title AS wikiproject, pa_class AS class, pa_importance AS importance |
|
152
|
|
|
FROM page_assessments |
|
153
|
|
|
LEFT JOIN page_assessments_projects ON pa_project_id = pap_project_id |
|
154
|
|
|
WHERE pa_page_id IN ($pageIds)"; |
|
155
|
|
|
|
|
156
|
|
|
$conn = $this->getProjectsConnection(); |
|
157
|
|
|
return $conn->executeQuery($query)->fetchAll(); |
|
158
|
|
|
} |
|
159
|
|
|
|
|
160
|
|
|
/** |
|
161
|
|
|
* Get any CheckWiki errors of a single page |
|
162
|
|
|
* @param Page $page |
|
163
|
|
|
* @return array Results from query |
|
164
|
|
|
*/ |
|
165
|
|
|
public function getCheckWikiErrors(Page $page) |
|
166
|
|
|
{ |
|
167
|
|
|
// Only support mainspace on Labs installations |
|
168
|
|
|
if ($page->getNamespace() !== 0 || !$this->isLabs()) { |
|
169
|
|
|
return []; |
|
170
|
|
|
} |
|
171
|
|
|
|
|
172
|
|
|
$sql = "SELECT error, notice, found, name_trans AS name, prio, text_trans AS explanation |
|
173
|
|
|
FROM s51080__checkwiki_p.cw_error a |
|
174
|
|
|
JOIN s51080__checkwiki_p.cw_overview_errors b |
|
175
|
|
|
WHERE a.project = b.project |
|
176
|
|
|
AND a.project = :dbName |
|
177
|
|
|
AND a.title = :title |
|
178
|
|
|
AND a.error = b.id |
|
179
|
|
|
AND a.ok = 0"; |
|
180
|
|
|
|
|
181
|
|
|
// remove _p if present |
|
182
|
|
|
$dbName = preg_replace('/_p$/', '', $page->getProject()->getDatabaseName()); |
|
183
|
|
|
|
|
184
|
|
|
// Page title without underscores (str_replace just to be sure) |
|
185
|
|
|
$pageTitle = str_replace('_', ' ', $page->getTitle()); |
|
186
|
|
|
|
|
187
|
|
|
$resultQuery = $this->getToolsConnection()->prepare($sql); |
|
188
|
|
|
$resultQuery->bindParam(':dbName', $dbName); |
|
189
|
|
|
$resultQuery->bindParam(':title', $pageTitle); |
|
190
|
|
|
$resultQuery->execute(); |
|
191
|
|
|
|
|
192
|
|
|
return $resultQuery->fetchAll(); |
|
193
|
|
|
} |
|
194
|
|
|
|
|
195
|
|
|
/** |
|
196
|
|
|
* Get basic wikidata on the page: label and description. |
|
197
|
|
|
* @param Page $page |
|
198
|
|
|
* @return string[] In the format: |
|
199
|
|
|
* [[ |
|
200
|
|
|
* 'term' => string such as 'label', |
|
201
|
|
|
* 'term_text' => string (value for 'label'), |
|
202
|
|
|
* ], ... ] |
|
203
|
|
|
*/ |
|
204
|
|
|
public function getWikidataInfo(Page $page) |
|
205
|
|
|
{ |
|
206
|
|
|
if (empty($page->getWikidataId())) { |
|
207
|
|
|
return []; |
|
208
|
|
|
} |
|
209
|
|
|
|
|
210
|
|
|
$wikidataId = ltrim($page->getWikidataId(), 'Q'); |
|
211
|
|
|
$lang = $page->getProject()->getLang(); |
|
212
|
|
|
|
|
213
|
|
|
$sql = "SELECT IF(term_type = 'label', 'label', 'description') AS term, term_text |
|
214
|
|
|
FROM wikidatawiki_p.wb_entity_per_page |
|
215
|
|
|
JOIN wikidatawiki_p.page ON epp_page_id = page_id |
|
216
|
|
|
JOIN wikidatawiki_p.wb_terms ON term_entity_id = epp_entity_id |
|
217
|
|
|
AND term_language = :lang |
|
218
|
|
|
AND term_type IN ('label', 'description') |
|
219
|
|
|
WHERE epp_entity_id = :wikidataId |
|
220
|
|
|
|
|
221
|
|
|
UNION |
|
222
|
|
|
|
|
223
|
|
|
SELECT pl_title AS term, wb_terms.term_text |
|
224
|
|
|
FROM wikidatawiki_p.pagelinks |
|
225
|
|
|
JOIN wikidatawiki_p.wb_terms ON term_entity_id = SUBSTRING(pl_title, 2) |
|
226
|
|
|
AND term_entity_type = (IF(SUBSTRING(pl_title, 1, 1) = 'Q', 'item', 'property')) |
|
227
|
|
|
AND term_language = :lang |
|
228
|
|
|
AND term_type = 'label' |
|
229
|
|
|
WHERE pl_namespace IN (0, 120) |
|
230
|
|
|
AND pl_from = ( |
|
231
|
|
|
SELECT page_id FROM page |
|
232
|
|
|
WHERE page_namespace = 0 |
|
233
|
|
|
AND page_title = 'Q:wikidataId' |
|
234
|
|
|
)"; |
|
235
|
|
|
|
|
236
|
|
|
$resultQuery = $this->getProjectsConnection()->prepare($sql); |
|
237
|
|
|
$resultQuery->bindParam(':lang', $lang); |
|
238
|
|
|
$resultQuery->bindParam(':wikidataId', $wikidataId); |
|
239
|
|
|
$resultQuery->execute(); |
|
240
|
|
|
|
|
241
|
|
|
return $resultQuery->fetchAll(); |
|
242
|
|
|
} |
|
243
|
|
|
|
|
244
|
|
|
/** |
|
245
|
|
|
* Get or count all wikidata items for the given page, |
|
246
|
|
|
* not just languages of sister projects |
|
247
|
|
|
* @param Page $page |
|
248
|
|
|
* @param bool $count Set to true to get only a COUNT |
|
249
|
|
|
* @return string[]|int Records as returend by the DB, |
|
250
|
|
|
* or raw COUNT of the records. |
|
251
|
|
|
*/ |
|
252
|
|
|
public function getWikidataItems(Page $page, $count = false) |
|
253
|
|
|
{ |
|
254
|
|
|
if (!$page->getWikidataId()) { |
|
255
|
|
|
return $count ? 0 : []; |
|
256
|
|
|
} |
|
257
|
|
|
|
|
258
|
|
|
$wikidataId = ltrim($page->getWikidataId(), 'Q'); |
|
259
|
|
|
|
|
260
|
|
|
$sql = "SELECT " . ($count ? 'COUNT(*) AS count' : '*') . " |
|
261
|
|
|
FROM wikidatawiki_p.wb_items_per_site |
|
262
|
|
|
WHERE ips_item_id = :wikidataId"; |
|
263
|
|
|
|
|
264
|
|
|
$resultQuery = $this->getProjectsConnection()->prepare($sql); |
|
265
|
|
|
$resultQuery->bindParam(':wikidataId', $wikidataId); |
|
266
|
|
|
$resultQuery->execute(); |
|
267
|
|
|
|
|
268
|
|
|
$result = $resultQuery->fetchAll(); |
|
269
|
|
|
|
|
270
|
|
|
return $count ? (int) $result[0]['count'] : $result; |
|
271
|
|
|
} |
|
272
|
|
|
|
|
273
|
|
|
/** |
|
274
|
|
|
* Count wikidata items for the given page, not just languages of sister projects |
|
275
|
|
|
* @param Page $page |
|
276
|
|
|
* @return int Number of records. |
|
277
|
|
|
*/ |
|
278
|
|
|
public function countWikidataItems(Page $page) |
|
279
|
|
|
{ |
|
280
|
|
|
return $this->getWikidataItems($page, true); |
|
281
|
|
|
} |
|
282
|
|
|
} |
|
283
|
|
|
|
Sometimes obsolete code just ends up commented out instead of removed. In this case it is better to remove the code once you have checked you do not need it.
The code might also have been commented out for debugging purposes. In this case it is vital that someone uncomments it again or your project may behave in very unexpected ways in production.
This check looks for comments that seem to be mostly valid code and reports them.