1
|
|
|
<?php |
2
|
|
|
|
3
|
|
|
declare(strict_types = 1); |
4
|
|
|
|
5
|
|
|
namespace App\Repository; |
6
|
|
|
|
7
|
|
|
use App\Model\Edit; |
8
|
|
|
use App\Model\Page; |
9
|
|
|
use Doctrine\DBAL\Driver\ResultStatement; |
10
|
|
|
use GuzzleHttp\Client; |
11
|
|
|
use Psr\Cache\CacheItemPoolInterface; |
12
|
|
|
use Psr\Container\ContainerInterface; |
13
|
|
|
use Psr\Log\LoggerInterface; |
14
|
|
|
|
15
|
|
|
/** |
16
|
|
|
* ArticleInfoRepository is responsible for retrieving data about a single |
17
|
|
|
* article on a given wiki. |
18
|
|
|
* @codeCoverageIgnore |
19
|
|
|
*/ |
20
|
|
|
class ArticleInfoRepository extends Repository |
21
|
|
|
{ |
22
|
|
|
protected EditRepository $editRepo; |
23
|
|
|
protected UserRepository $userRepo; |
24
|
|
|
|
25
|
|
|
/** @var int Maximum number of revisions to process, as configured via app.max_page_revisions */ |
26
|
|
|
protected int $maxPageRevisions; |
27
|
|
|
|
28
|
|
|
/** |
29
|
|
|
* @param ContainerInterface $container |
30
|
|
|
* @param CacheItemPoolInterface $cache |
31
|
|
|
* @param Client $guzzle |
32
|
|
|
* @param LoggerInterface $logger |
33
|
|
|
* @param bool $isWMF |
34
|
|
|
* @param int $queryTimeout |
35
|
|
|
* @param EditRepository $editRepo |
36
|
|
|
*/ |
37
|
|
|
public function __construct( |
38
|
|
|
ContainerInterface $container, |
39
|
|
|
CacheItemPoolInterface $cache, |
40
|
|
|
Client $guzzle, |
41
|
|
|
LoggerInterface $logger, |
42
|
|
|
bool $isWMF, |
43
|
|
|
int $queryTimeout, |
44
|
|
|
EditRepository $editRepo, |
45
|
|
|
UserRepository $userRepo |
46
|
|
|
) { |
47
|
|
|
$this->editRepo = $editRepo; |
48
|
|
|
$this->userRepo = $userRepo; |
49
|
|
|
parent::__construct($container, $cache, $guzzle, $logger, $isWMF, $queryTimeout); |
50
|
|
|
} |
51
|
|
|
|
52
|
|
|
/** |
53
|
|
|
* Get the performance maximum on the number of revisions to process. |
54
|
|
|
* @return int |
55
|
|
|
*/ |
56
|
|
|
public function getMaxPageRevisions(): int |
57
|
|
|
{ |
58
|
|
|
if (!isset($this->maxPageRevisions)) { |
59
|
|
|
$this->maxPageRevisions = (int)$this->container->getParameter('app.max_page_revisions'); |
|
|
|
|
60
|
|
|
} |
61
|
|
|
return $this->maxPageRevisions; |
62
|
|
|
} |
63
|
|
|
|
64
|
|
|
/** |
65
|
|
|
* Factory to instantiate a new Edit for the given revision. |
66
|
|
|
* @param Page $page |
67
|
|
|
* @param array $revision |
68
|
|
|
* @return Edit |
69
|
|
|
*/ |
70
|
|
|
public function getEdit(Page $page, array $revision): Edit |
71
|
|
|
{ |
72
|
|
|
return new Edit($this->editRepo, $this->userRepo, $page, $revision); |
73
|
|
|
} |
74
|
|
|
|
75
|
|
|
/** |
76
|
|
|
* Get the number of edits made to the page by bots or former bots. |
77
|
|
|
* @param Page $page |
78
|
|
|
* @param false|int $start |
79
|
|
|
* @param false|int $end |
80
|
|
|
* @param ?int $limit |
81
|
|
|
* @param bool $count Return a count rather than the full set of rows. |
82
|
|
|
* @return ResultStatement resolving with keys 'count', 'username' and 'current'. |
83
|
|
|
*/ |
84
|
|
|
public function getBotData(Page $page, $start, $end, ?int $limit, bool $count = false): ResultStatement |
85
|
|
|
{ |
86
|
|
|
$project = $page->getProject(); |
87
|
|
|
$revTable = $project->getTableName('revision'); |
88
|
|
|
$userGroupsTable = $project->getTableName('user_groups'); |
89
|
|
|
$userFormerGroupsTable = $project->getTableName('user_former_groups'); |
90
|
|
|
$actorTable = $project->getTableName('actor', 'revision'); |
91
|
|
|
|
92
|
|
|
$datesConditions = $this->getDateConditions($start, $end); |
93
|
|
|
|
94
|
|
|
if ($count) { |
95
|
|
|
$actorSelect = ''; |
96
|
|
|
$groupBy = ''; |
97
|
|
|
} else { |
98
|
|
|
$actorSelect = 'actor_name AS username, '; |
99
|
|
|
$groupBy = 'GROUP BY actor_user'; |
100
|
|
|
} |
101
|
|
|
|
102
|
|
|
$limitClause = ''; |
103
|
|
|
if (null !== $limit) { |
104
|
|
|
$limitClause = "LIMIT $limit"; |
105
|
|
|
} |
106
|
|
|
|
107
|
|
|
$sql = "SELECT COUNT(DISTINCT rev_id) AS count, $actorSelect '0' AS current |
108
|
|
|
FROM ( |
109
|
|
|
SELECT rev_id, rev_actor, rev_timestamp |
110
|
|
|
FROM $revTable |
111
|
|
|
WHERE rev_page = :pageId |
112
|
|
|
ORDER BY rev_timestamp DESC |
113
|
|
|
$limitClause |
114
|
|
|
) a |
115
|
|
|
JOIN $actorTable ON actor_id = rev_actor |
116
|
|
|
LEFT JOIN $userFormerGroupsTable ON actor_user = ufg_user |
117
|
|
|
WHERE ufg_group = 'bot' $datesConditions |
118
|
|
|
$groupBy |
119
|
|
|
UNION |
120
|
|
|
SELECT COUNT(DISTINCT rev_id) AS count, $actorSelect '1' AS current |
121
|
|
|
FROM ( |
122
|
|
|
SELECT rev_id, rev_actor, rev_timestamp |
123
|
|
|
FROM $revTable |
124
|
|
|
WHERE rev_page = :pageId |
125
|
|
|
ORDER BY rev_timestamp DESC |
126
|
|
|
$limitClause |
127
|
|
|
) a |
128
|
|
|
JOIN $actorTable ON actor_id = rev_actor |
129
|
|
|
LEFT JOIN $userGroupsTable ON actor_user = ug_user |
130
|
|
|
WHERE ug_group = 'bot' $datesConditions |
131
|
|
|
$groupBy"; |
132
|
|
|
|
133
|
|
|
return $this->executeProjectsQuery($project, $sql, ['pageId' => $page->getId()]); |
134
|
|
|
} |
135
|
|
|
|
136
|
|
|
/** |
137
|
|
|
* Get prior deletions, page moves, and protections to the page. |
138
|
|
|
* @param Page $page |
139
|
|
|
* @param false|int $start |
140
|
|
|
* @param false|int $end |
141
|
|
|
* @return string[] each entry with keys 'log_action', 'log_type' and 'timestamp'. |
142
|
|
|
*/ |
143
|
|
|
public function getLogEvents(Page $page, $start, $end): array |
144
|
|
|
{ |
145
|
|
|
$cacheKey = $this->getCacheKey(func_get_args(), 'page_logevents'); |
146
|
|
|
if ($this->cache->hasItem($cacheKey)) { |
147
|
|
|
return $this->cache->getItem($cacheKey)->get(); |
148
|
|
|
} |
149
|
|
|
$loggingTable = $page->getProject()->getTableName('logging', 'logindex'); |
150
|
|
|
|
151
|
|
|
$datesConditions = $this->getDateConditions($start, $end, false, '', 'log_timestamp'); |
152
|
|
|
|
153
|
|
|
$sql = "SELECT log_action, log_type, log_timestamp AS 'timestamp' |
154
|
|
|
FROM $loggingTable |
155
|
|
|
WHERE log_namespace = '" . $page->getNamespace() . "' |
156
|
|
|
AND log_title = :title AND log_timestamp > 1 $datesConditions |
157
|
|
|
AND log_type IN ('delete', 'move', 'protect', 'stable')"; |
158
|
|
|
$title = str_replace(' ', '_', $page->getTitle()); |
159
|
|
|
|
160
|
|
|
$result = $this->executeProjectsQuery($page->getProject(), $sql, ['title' => $title]) |
161
|
|
|
->fetchAllAssociative(); |
162
|
|
|
return $this->setCache($cacheKey, $result); |
163
|
|
|
} |
164
|
|
|
|
165
|
|
|
/** |
166
|
|
|
* Get the number of categories, templates, and files that are on the page. |
167
|
|
|
* @param Page $page |
168
|
|
|
* @return array With keys 'categories', 'templates' and 'files'. |
169
|
|
|
*/ |
170
|
|
|
public function getTransclusionData(Page $page): array |
171
|
|
|
{ |
172
|
|
|
$cacheKey = $this->getCacheKey(func_get_args(), 'page_transclusions'); |
173
|
|
|
if ($this->cache->hasItem($cacheKey)) { |
174
|
|
|
return $this->cache->getItem($cacheKey)->get(); |
175
|
|
|
} |
176
|
|
|
|
177
|
|
|
$categorylinksTable = $page->getProject()->getTableName('categorylinks'); |
178
|
|
|
$templatelinksTable = $page->getProject()->getTableName('templatelinks'); |
179
|
|
|
$imagelinksTable = $page->getProject()->getTableName('imagelinks'); |
180
|
|
|
$sql = "( |
181
|
|
|
SELECT 'categories' AS `key`, COUNT(*) AS val |
182
|
|
|
FROM $categorylinksTable |
183
|
|
|
WHERE cl_from = :pageId |
184
|
|
|
) UNION ( |
185
|
|
|
SELECT 'templates' AS `key`, COUNT(*) AS val |
186
|
|
|
FROM $templatelinksTable |
187
|
|
|
WHERE tl_from = :pageId |
188
|
|
|
) UNION ( |
189
|
|
|
SELECT 'files' AS `key`, COUNT(*) AS val |
190
|
|
|
FROM $imagelinksTable |
191
|
|
|
WHERE il_from = :pageId |
192
|
|
|
)"; |
193
|
|
|
$resultQuery = $this->executeProjectsQuery($page->getProject(), $sql, ['pageId' => $page->getId()]); |
194
|
|
|
$transclusionCounts = []; |
195
|
|
|
|
196
|
|
|
while ($result = $resultQuery->fetchAssociative()) { |
197
|
|
|
$transclusionCounts[$result['key']] = (int)$result['val']; |
198
|
|
|
} |
199
|
|
|
|
200
|
|
|
return $this->setCache($cacheKey, $transclusionCounts); |
201
|
|
|
} |
202
|
|
|
|
203
|
|
|
/** |
204
|
|
|
* Get the top editors to the page by edit count. |
205
|
|
|
* @param Page $page |
206
|
|
|
* @param false|int $start |
207
|
|
|
* @param false|int $end |
208
|
|
|
* @param int $limit |
209
|
|
|
* @param bool $noBots |
210
|
|
|
* @return array |
211
|
|
|
*/ |
212
|
|
|
public function getTopEditorsByEditCount( |
213
|
|
|
Page $page, |
214
|
|
|
$start = false, |
215
|
|
|
$end = false, |
216
|
|
|
int $limit = 20, |
217
|
|
|
bool $noBots = false |
218
|
|
|
): array { |
219
|
|
|
$cacheKey = $this->getCacheKey(func_get_args(), 'page_topeditors'); |
220
|
|
|
if ($this->cache->hasItem($cacheKey)) { |
221
|
|
|
return $this->cache->getItem($cacheKey)->get(); |
222
|
|
|
} |
223
|
|
|
|
224
|
|
|
$project = $page->getProject(); |
225
|
|
|
// Faster to use revision instead of revision_userindex in this case. |
226
|
|
|
$revTable = $project->getTableName('revision', ''); |
227
|
|
|
$actorTable = $project->getTableName('actor'); |
228
|
|
|
|
229
|
|
|
$dateConditions = $this->getDateConditions($start, $end); |
230
|
|
|
|
231
|
|
|
$sql = "SELECT actor_name AS username, |
232
|
|
|
COUNT(rev_id) AS count, |
233
|
|
|
SUM(rev_minor_edit) AS minor, |
234
|
|
|
MIN(rev_timestamp) AS first_timestamp, |
235
|
|
|
MIN(rev_id) AS first_revid, |
236
|
|
|
MAX(rev_timestamp) AS latest_timestamp, |
237
|
|
|
MAX(rev_id) AS latest_revid |
238
|
|
|
FROM $revTable |
239
|
|
|
JOIN $actorTable ON rev_actor = actor_id |
240
|
|
|
WHERE rev_page = :pageId $dateConditions"; |
241
|
|
|
|
242
|
|
|
if ($noBots) { |
243
|
|
|
$userGroupsTable = $project->getTableName('user_groups'); |
244
|
|
|
$sql .= "AND NOT EXISTS ( |
245
|
|
|
SELECT 1 |
246
|
|
|
FROM $userGroupsTable |
247
|
|
|
WHERE ug_user = actor_user |
248
|
|
|
AND ug_group = 'bot' |
249
|
|
|
)"; |
250
|
|
|
} |
251
|
|
|
|
252
|
|
|
$sql .= "GROUP BY actor_id |
253
|
|
|
ORDER BY count DESC |
254
|
|
|
LIMIT $limit"; |
255
|
|
|
|
256
|
|
|
$result = $this->executeProjectsQuery($project, $sql, [ |
257
|
|
|
'pageId' => $page->getId(), |
258
|
|
|
])->fetchAllAssociative(); |
259
|
|
|
|
260
|
|
|
return $this->setCache($cacheKey, $result); |
261
|
|
|
} |
262
|
|
|
|
263
|
|
|
/** |
264
|
|
|
* Get various basic info used in the API, including the number of revisions, unique authors, initial author |
265
|
|
|
* and edit count of the initial author. This is combined into one query for better performance. Caching is only |
266
|
|
|
* applied if it took considerable time to process, because using the gadget, this will get hit for a different page |
267
|
|
|
* constantly, where the likelihood of cache benefiting us is slim. |
268
|
|
|
* @param Page $page The page. |
269
|
|
|
* @return string[]|false false if the page was not found. |
270
|
|
|
*/ |
271
|
|
|
public function getBasicEditingInfo(Page $page) |
272
|
|
|
{ |
273
|
|
|
$cacheKey = $this->getCacheKey(func_get_args(), 'page_basicinfo'); |
274
|
|
|
if ($this->cache->hasItem($cacheKey)) { |
275
|
|
|
return $this->cache->getItem($cacheKey)->get(); |
276
|
|
|
} |
277
|
|
|
|
278
|
|
|
$project = $page->getProject(); |
279
|
|
|
$revTable = $project->getTableName('revision'); |
280
|
|
|
$userTable = $project->getTableName('user'); |
281
|
|
|
$pageTable = $project->getTableName('page'); |
282
|
|
|
$actorTable = $project->getTableName('actor'); |
283
|
|
|
|
284
|
|
|
$sql = "SELECT *, ( |
285
|
|
|
SELECT user_editcount |
286
|
|
|
FROM $userTable |
287
|
|
|
WHERE user_id = author_user_id |
288
|
|
|
) AS author_editcount |
289
|
|
|
FROM ( |
290
|
|
|
( |
291
|
|
|
SELECT COUNT(rev_id) AS num_edits, |
292
|
|
|
COUNT(DISTINCT(rev_actor)) AS num_editors, |
293
|
|
|
SUM(rev_minor_edit) AS minor_edits |
294
|
|
|
FROM $revTable |
295
|
|
|
WHERE rev_page = :pageid |
296
|
|
|
AND rev_timestamp > 0 # Use rev_timestamp index |
297
|
|
|
) a, |
298
|
|
|
( |
299
|
|
|
# With really old pages, the rev_timestamp may need to be sorted ASC, |
300
|
|
|
# and the lowest rev_id may not be the first revision. |
301
|
|
|
SELECT actor_name AS author, |
302
|
|
|
actor_user AS author_user_id, |
303
|
|
|
rev_timestamp AS created_at, |
304
|
|
|
rev_id AS created_rev_id |
305
|
|
|
FROM $revTable |
306
|
|
|
JOIN $actorTable ON actor_id = rev_actor |
307
|
|
|
WHERE rev_page = :pageid |
308
|
|
|
ORDER BY rev_timestamp ASC |
309
|
|
|
LIMIT 1 |
310
|
|
|
) b, |
311
|
|
|
( |
312
|
|
|
SELECT rev_timestamp AS modified_at, |
313
|
|
|
rev_id AS modified_rev_id |
314
|
|
|
FROM $revTable |
315
|
|
|
JOIN $pageTable ON page_id = rev_page |
316
|
|
|
WHERE rev_page = :pageid |
317
|
|
|
AND rev_id = page_latest |
318
|
|
|
) c |
319
|
|
|
)"; |
320
|
|
|
$params = ['pageid' => $page->getId()]; |
321
|
|
|
|
322
|
|
|
// Get current time so we can compare timestamps |
323
|
|
|
// and decide whether or to cache the result. |
324
|
|
|
$time1 = time(); |
325
|
|
|
|
326
|
|
|
/** |
327
|
|
|
* This query can sometimes take too long to run for pages with tens of thousands |
328
|
|
|
* of revisions. This query is used by the ArticleInfo gadget, which shows basic |
329
|
|
|
* data in real-time, so if it takes too long than the user probably didn't even |
330
|
|
|
* wait to see the result. We'll pass 60 as the last parameter to executeProjectsQuery, |
331
|
|
|
* which will set the max_statement_time to 60 seconds. |
332
|
|
|
*/ |
333
|
|
|
$result = $this->executeProjectsQuery($project, $sql, $params, 60)->fetchAssociative(); |
334
|
|
|
|
335
|
|
|
$time2 = time(); |
336
|
|
|
|
337
|
|
|
// If it took over 5 seconds, cache the result for 20 minutes. |
338
|
|
|
if ($time2 - $time1 > 5) { |
339
|
|
|
$this->setCache($cacheKey, $result, 'PT20M'); |
340
|
|
|
} |
341
|
|
|
|
342
|
|
|
return $result ?? false; |
343
|
|
|
} |
344
|
|
|
} |
345
|
|
|
|