1
|
|
|
<?php |
2
|
|
|
|
3
|
|
|
declare(strict_types = 1); |
4
|
|
|
|
5
|
|
|
namespace App\Repository; |
6
|
|
|
|
7
|
|
|
use App\Helper\AutomatedEditsHelper; |
8
|
|
|
use App\Model\Edit; |
9
|
|
|
use App\Model\Page; |
10
|
|
|
use Doctrine\DBAL\Driver\ResultStatement; |
11
|
|
|
use Doctrine\Persistence\ManagerRegistry; |
12
|
|
|
use GuzzleHttp\Client; |
13
|
|
|
use PDO; |
14
|
|
|
use Psr\Cache\CacheItemPoolInterface; |
15
|
|
|
use Psr\Log\LoggerInterface; |
16
|
|
|
use Symfony\Component\DependencyInjection\ParameterBag\ParameterBagInterface; |
17
|
|
|
use Symfony\Component\HttpFoundation\RequestStack; |
18
|
|
|
|
19
|
|
|
/** |
20
|
|
|
* PageInfoRepository is responsible for retrieving data about a single page on a given wiki. |
21
|
|
|
* @codeCoverageIgnore |
22
|
|
|
*/ |
23
|
|
|
class PageInfoRepository extends AutoEditsRepository |
24
|
|
|
{ |
25
|
|
|
protected EditRepository $editRepo; |
26
|
|
|
protected UserRepository $userRepo; |
27
|
|
|
|
28
|
|
|
/** @var int Maximum number of revisions to process, as configured via APP_MAX_PAGE_REVISIONS */ |
29
|
|
|
protected int $maxPageRevisions; |
30
|
|
|
|
31
|
|
|
/** |
32
|
|
|
* @param ManagerRegistry $managerRegistry |
33
|
|
|
* @param CacheItemPoolInterface $cache |
34
|
|
|
* @param Client $guzzle |
35
|
|
|
* @param LoggerInterface $logger |
36
|
|
|
* @param ParameterBagInterface $parameterBag |
37
|
|
|
* @param bool $isWMF |
38
|
|
|
* @param int $queryTimeout |
39
|
|
|
* @param EditRepository $editRepo |
40
|
|
|
* @param UserRepository $userRepo |
41
|
|
|
*/ |
42
|
|
|
public function __construct( |
43
|
|
|
ManagerRegistry $managerRegistry, |
44
|
|
|
CacheItemPoolInterface $cache, |
45
|
|
|
Client $guzzle, |
46
|
|
|
LoggerInterface $logger, |
47
|
|
|
ParameterBagInterface $parameterBag, |
48
|
|
|
bool $isWMF, |
49
|
|
|
int $queryTimeout, |
50
|
|
|
EditRepository $editRepo, |
51
|
|
|
UserRepository $userRepo, |
52
|
|
|
ProjectRepository $projectRepo, |
53
|
|
|
AutomatedEditsHelper $autoEditsHelper, |
54
|
|
|
RequestStack $requestStack |
55
|
|
|
) { |
56
|
|
|
$this->editRepo = $editRepo; |
57
|
|
|
$this->userRepo = $userRepo; |
58
|
|
|
parent::__construct( |
59
|
|
|
$managerRegistry, |
60
|
|
|
$cache, |
61
|
|
|
$guzzle, |
62
|
|
|
$logger, |
63
|
|
|
$parameterBag, |
64
|
|
|
$isWMF, |
65
|
|
|
$queryTimeout, |
66
|
|
|
$projectRepo, |
67
|
|
|
$autoEditsHelper, |
68
|
|
|
$requestStack |
69
|
|
|
); |
70
|
|
|
} |
71
|
|
|
|
72
|
|
|
/** |
73
|
|
|
* Get the performance maximum on the number of revisions to process. |
74
|
|
|
* @return int |
75
|
|
|
*/ |
76
|
|
|
public function getMaxPageRevisions(): int |
77
|
|
|
{ |
78
|
|
|
if (!isset($this->maxPageRevisions)) { |
79
|
|
|
$this->maxPageRevisions = (int)$this->parameterBag->get('app.max_page_revisions'); |
80
|
|
|
} |
81
|
|
|
return $this->maxPageRevisions; |
82
|
|
|
} |
83
|
|
|
|
84
|
|
|
/** |
85
|
|
|
* Factory to instantiate a new Edit for the given revision. |
86
|
|
|
* @param Page $page |
87
|
|
|
* @param array $revision |
88
|
|
|
* @return Edit |
89
|
|
|
*/ |
90
|
|
|
public function getEdit(Page $page, array $revision): Edit |
91
|
|
|
{ |
92
|
|
|
return new Edit($this->editRepo, $this->userRepo, $page, $revision); |
93
|
|
|
} |
94
|
|
|
|
95
|
|
|
/** |
96
|
|
|
* Get the number of edits made to the page by bots or former bots. |
97
|
|
|
* @param Page $page |
98
|
|
|
* @param false|int $start |
99
|
|
|
* @param false|int $end |
100
|
|
|
* @param ?int $limit |
101
|
|
|
* @param bool $count Return a count rather than the full set of rows. |
102
|
|
|
* @return ResultStatement resolving with keys 'count', 'username' and 'current'. |
103
|
|
|
*/ |
104
|
|
|
public function getBotData(Page $page, $start, $end, ?int $limit, bool $count = false): ResultStatement |
105
|
|
|
{ |
106
|
|
|
$project = $page->getProject(); |
107
|
|
|
$revTable = $project->getTableName('revision'); |
108
|
|
|
$userGroupsTable = $project->getTableName('user_groups'); |
109
|
|
|
$userFormerGroupsTable = $project->getTableName('user_former_groups'); |
110
|
|
|
$actorTable = $project->getTableName('actor', 'revision'); |
111
|
|
|
|
112
|
|
|
$datesConditions = $this->getDateConditions($start, $end); |
113
|
|
|
|
114
|
|
|
if ($count) { |
115
|
|
|
$actorSelect = ''; |
116
|
|
|
$groupBy = ''; |
117
|
|
|
} else { |
118
|
|
|
$actorSelect = 'actor_name AS username, '; |
119
|
|
|
$groupBy = 'GROUP BY actor_user'; |
120
|
|
|
} |
121
|
|
|
|
122
|
|
|
$limitClause = ''; |
123
|
|
|
if (null !== $limit) { |
124
|
|
|
$limitClause = "LIMIT $limit"; |
125
|
|
|
} |
126
|
|
|
|
127
|
|
|
$sql = "SELECT COUNT(DISTINCT rev_id) AS `count`, $actorSelect '0' AS `current` |
128
|
|
|
FROM ( |
129
|
|
|
SELECT rev_id, rev_actor, rev_timestamp |
130
|
|
|
FROM $revTable |
131
|
|
|
WHERE rev_page = :pageId |
132
|
|
|
ORDER BY rev_timestamp DESC |
133
|
|
|
$limitClause |
134
|
|
|
) a |
135
|
|
|
JOIN $actorTable ON actor_id = rev_actor |
136
|
|
|
LEFT JOIN $userFormerGroupsTable ON actor_user = ufg_user |
137
|
|
|
WHERE ufg_group = 'bot' $datesConditions |
138
|
|
|
$groupBy |
139
|
|
|
UNION |
140
|
|
|
SELECT COUNT(DISTINCT rev_id) AS count, $actorSelect '1' AS current |
141
|
|
|
FROM ( |
142
|
|
|
SELECT rev_id, rev_actor, rev_timestamp |
143
|
|
|
FROM $revTable |
144
|
|
|
WHERE rev_page = :pageId |
145
|
|
|
ORDER BY rev_timestamp DESC |
146
|
|
|
$limitClause |
147
|
|
|
) a |
148
|
|
|
JOIN $actorTable ON actor_id = rev_actor |
149
|
|
|
LEFT JOIN $userGroupsTable ON actor_user = ug_user |
150
|
|
|
WHERE ug_group = 'bot' $datesConditions |
151
|
|
|
$groupBy"; |
152
|
|
|
|
153
|
|
|
return $this->executeProjectsQuery($project, $sql, ['pageId' => $page->getId()]); |
154
|
|
|
} |
155
|
|
|
|
156
|
|
|
/** |
157
|
|
|
* Get prior deletions, page moves, and protections to the page. |
158
|
|
|
* @param Page $page |
159
|
|
|
* @param false|int $start |
160
|
|
|
* @param false|int $end |
161
|
|
|
* @return string[] each entry with keys 'log_action', 'log_type' and 'timestamp'. |
162
|
|
|
*/ |
163
|
|
|
public function getLogEvents(Page $page, $start, $end): array |
164
|
|
|
{ |
165
|
|
|
$cacheKey = $this->getCacheKey(func_get_args(), 'page_logevents'); |
166
|
|
|
if ($this->cache->hasItem($cacheKey)) { |
167
|
|
|
return $this->cache->getItem($cacheKey)->get(); |
168
|
|
|
} |
169
|
|
|
$loggingTable = $page->getProject()->getTableName('logging', 'logindex'); |
170
|
|
|
|
171
|
|
|
$datesConditions = $this->getDateConditions($start, $end, false, '', 'log_timestamp'); |
172
|
|
|
|
173
|
|
|
$sql = "SELECT log_action, log_type, log_timestamp AS 'timestamp' |
174
|
|
|
FROM $loggingTable |
175
|
|
|
WHERE log_namespace = '" . $page->getNamespace() . "' |
176
|
|
|
AND log_title = :title AND log_timestamp > 1 $datesConditions |
177
|
|
|
AND log_type IN ('delete', 'move', 'protect', 'stable')"; |
178
|
|
|
$title = str_replace(' ', '_', $page->getTitle()); |
179
|
|
|
|
180
|
|
|
$result = $this->executeProjectsQuery($page->getProject(), $sql, ['title' => $title]) |
181
|
|
|
->fetchAllAssociative(); |
182
|
|
|
return $this->setCache($cacheKey, $result); |
183
|
|
|
} |
184
|
|
|
|
185
|
|
|
/** |
186
|
|
|
* Get the number of categories, templates, and files that are on the page. |
187
|
|
|
* @param Page $page |
188
|
|
|
* @return array With keys 'categories', 'templates' and 'files'. |
189
|
|
|
*/ |
190
|
|
|
public function getTransclusionData(Page $page): array |
191
|
|
|
{ |
192
|
|
|
$cacheKey = $this->getCacheKey(func_get_args(), 'page_transclusions'); |
193
|
|
|
if ($this->cache->hasItem($cacheKey)) { |
194
|
|
|
return $this->cache->getItem($cacheKey)->get(); |
195
|
|
|
} |
196
|
|
|
|
197
|
|
|
$categorylinksTable = $page->getProject()->getTableName('categorylinks'); |
198
|
|
|
$templatelinksTable = $page->getProject()->getTableName('templatelinks'); |
199
|
|
|
$imagelinksTable = $page->getProject()->getTableName('imagelinks'); |
200
|
|
|
$sql = "( |
201
|
|
|
SELECT 'categories' AS `key`, COUNT(*) AS val |
202
|
|
|
FROM $categorylinksTable |
203
|
|
|
WHERE cl_from = :pageId |
204
|
|
|
) UNION ( |
205
|
|
|
SELECT 'templates' AS `key`, COUNT(*) AS val |
206
|
|
|
FROM $templatelinksTable |
207
|
|
|
WHERE tl_from = :pageId |
208
|
|
|
) UNION ( |
209
|
|
|
SELECT 'files' AS `key`, COUNT(*) AS val |
210
|
|
|
FROM $imagelinksTable |
211
|
|
|
WHERE il_from = :pageId |
212
|
|
|
)"; |
213
|
|
|
$resultQuery = $this->executeProjectsQuery($page->getProject(), $sql, ['pageId' => $page->getId()]); |
214
|
|
|
$transclusionCounts = []; |
215
|
|
|
|
216
|
|
|
while ($result = $resultQuery->fetchAssociative()) { |
217
|
|
|
$transclusionCounts[$result['key']] = (int)$result['val']; |
218
|
|
|
} |
219
|
|
|
|
220
|
|
|
return $this->setCache($cacheKey, $transclusionCounts); |
221
|
|
|
} |
222
|
|
|
|
223
|
|
|
/** |
224
|
|
|
* Get the top editors to the page by edit count. |
225
|
|
|
* @param Page $page |
226
|
|
|
* @param false|int $start |
227
|
|
|
* @param false|int $end |
228
|
|
|
* @param int $limit |
229
|
|
|
* @param bool $noBots |
230
|
|
|
* @return array |
231
|
|
|
*/ |
232
|
|
|
public function getTopEditorsByEditCount( |
233
|
|
|
Page $page, |
234
|
|
|
$start = false, |
235
|
|
|
$end = false, |
236
|
|
|
int $limit = 20, |
237
|
|
|
bool $noBots = false |
238
|
|
|
): array { |
239
|
|
|
$cacheKey = $this->getCacheKey(func_get_args(), 'page_topeditors'); |
240
|
|
|
if ($this->cache->hasItem($cacheKey)) { |
241
|
|
|
return $this->cache->getItem($cacheKey)->get(); |
242
|
|
|
} |
243
|
|
|
|
244
|
|
|
$project = $page->getProject(); |
245
|
|
|
// Faster to use revision instead of revision_userindex in this case. |
246
|
|
|
$revTable = $project->getTableName('revision', ''); |
247
|
|
|
$actorTable = $project->getTableName('actor'); |
248
|
|
|
|
249
|
|
|
$dateConditions = $this->getDateConditions($start, $end); |
250
|
|
|
|
251
|
|
|
$sql = "SELECT actor_name AS username, |
252
|
|
|
COUNT(rev_id) AS count, |
253
|
|
|
SUM(rev_minor_edit) AS minor, |
254
|
|
|
MIN(rev_timestamp) AS first_timestamp, |
255
|
|
|
MIN(rev_id) AS first_revid, |
256
|
|
|
MAX(rev_timestamp) AS latest_timestamp, |
257
|
|
|
MAX(rev_id) AS latest_revid |
258
|
|
|
FROM $revTable |
259
|
|
|
JOIN $actorTable ON rev_actor = actor_id |
260
|
|
|
WHERE rev_page = :pageId $dateConditions"; |
261
|
|
|
|
262
|
|
|
if ($noBots) { |
263
|
|
|
$userGroupsTable = $project->getTableName('user_groups'); |
264
|
|
|
$sql .= "AND NOT EXISTS ( |
265
|
|
|
SELECT 1 |
266
|
|
|
FROM $userGroupsTable |
267
|
|
|
WHERE ug_user = actor_user |
268
|
|
|
AND ug_group = 'bot' |
269
|
|
|
)"; |
270
|
|
|
} |
271
|
|
|
|
272
|
|
|
$sql .= "GROUP BY actor_id |
273
|
|
|
ORDER BY count DESC |
274
|
|
|
LIMIT $limit"; |
275
|
|
|
|
276
|
|
|
$result = $this->executeProjectsQuery($project, $sql, [ |
277
|
|
|
'pageId' => $page->getId(), |
278
|
|
|
])->fetchAllAssociative(); |
279
|
|
|
|
280
|
|
|
return $this->setCache($cacheKey, $result); |
281
|
|
|
} |
282
|
|
|
|
283
|
|
|
/** |
284
|
|
|
* Get various basic info used in the API, including the number of revisions, unique authors, initial author |
285
|
|
|
* and edit count of the initial author. This is combined into one query for better performance. Caching is only |
286
|
|
|
* applied if it took considerable time to process, because using the gadget, this will get hit for a different page |
287
|
|
|
* constantly, where the likelihood of cache benefiting us is slim. |
288
|
|
|
* @param Page $page The page. |
289
|
|
|
* @return string[]|false false if the page was not found. |
290
|
|
|
*/ |
291
|
|
|
public function getBasicEditingInfo(Page $page) |
292
|
|
|
{ |
293
|
|
|
$cacheKey = $this->getCacheKey(func_get_args(), 'page_basicinfo'); |
294
|
|
|
if ($this->cache->hasItem($cacheKey)) { |
295
|
|
|
return $this->cache->getItem($cacheKey)->get(); |
296
|
|
|
} |
297
|
|
|
|
298
|
|
|
$project = $page->getProject(); |
299
|
|
|
$revTable = $project->getTableName('revision'); |
300
|
|
|
$userTable = $project->getTableName('user'); |
301
|
|
|
$pageTable = $project->getTableName('page'); |
302
|
|
|
$actorTable = $project->getTableName('actor'); |
303
|
|
|
|
304
|
|
|
$sql = "SELECT *, ( |
305
|
|
|
SELECT user_editcount |
306
|
|
|
FROM $userTable |
307
|
|
|
WHERE user_id = author_user_id |
308
|
|
|
) AS author_editcount |
309
|
|
|
FROM ( |
310
|
|
|
( |
311
|
|
|
SELECT COUNT(rev_id) AS num_edits, |
312
|
|
|
COUNT(DISTINCT(rev_actor)) AS num_editors, |
313
|
|
|
SUM(actor_user IS NULL) AS ip_edits, |
314
|
|
|
SUM(rev_minor_edit) AS minor_edits |
315
|
|
|
FROM $revTable |
316
|
|
|
JOIN $actorTable ON actor_id = rev_actor |
317
|
|
|
WHERE rev_page = :pageid |
318
|
|
|
AND rev_timestamp > 0 # Use rev_timestamp index |
319
|
|
|
) a, |
320
|
|
|
( |
321
|
|
|
# With really old pages, the rev_timestamp may need to be sorted ASC, |
322
|
|
|
# and the lowest rev_id may not be the first revision. |
323
|
|
|
SELECT actor_name AS author, |
324
|
|
|
actor_user AS author_user_id, |
325
|
|
|
rev_timestamp AS created_at, |
326
|
|
|
rev_id AS created_rev_id |
327
|
|
|
FROM $revTable |
328
|
|
|
JOIN $actorTable ON actor_id = rev_actor |
329
|
|
|
WHERE rev_page = :pageid |
330
|
|
|
ORDER BY rev_timestamp ASC |
331
|
|
|
LIMIT 1 |
332
|
|
|
) b, |
333
|
|
|
( |
334
|
|
|
SELECT rev_timestamp AS modified_at, |
335
|
|
|
rev_id AS modified_rev_id |
336
|
|
|
FROM $revTable |
337
|
|
|
JOIN $pageTable ON page_id = rev_page |
338
|
|
|
WHERE rev_page = :pageid |
339
|
|
|
AND rev_id = page_latest |
340
|
|
|
) c |
341
|
|
|
)"; |
342
|
|
|
$params = ['pageid' => $page->getId()]; |
343
|
|
|
|
344
|
|
|
// Get current time so we can compare timestamps |
345
|
|
|
// and decide whether or to cache the result. |
346
|
|
|
$time1 = time(); |
347
|
|
|
|
348
|
|
|
/** |
349
|
|
|
* This query can sometimes take too long to run for pages with tens of thousands |
350
|
|
|
* of revisions. This query is used by the PageInfo gadget, which shows basic |
351
|
|
|
* data in real-time, so if it takes too long than the user probably didn't even |
352
|
|
|
* wait to see the result. We'll pass 60 as the last parameter to executeProjectsQuery, |
353
|
|
|
* which will set the max_statement_time to 60 seconds. |
354
|
|
|
*/ |
355
|
|
|
$result = $this->executeProjectsQuery($project, $sql, $params, 60)->fetchAssociative(); |
356
|
|
|
|
357
|
|
|
$time2 = time(); |
358
|
|
|
|
359
|
|
|
// If it took over 5 seconds, cache the result for 20 minutes. |
360
|
|
|
if ($time2 - $time1 > 5) { |
361
|
|
|
$this->setCache($cacheKey, $result, 'PT20M'); |
362
|
|
|
} |
363
|
|
|
|
364
|
|
|
return $result ?? false; |
365
|
|
|
} |
366
|
|
|
|
367
|
|
|
/** |
368
|
|
|
* Get counts of (semi-)automated tools that were used to edit the page. |
369
|
|
|
* @param Page $page |
370
|
|
|
* @param $start |
371
|
|
|
* @param $end |
372
|
|
|
* @return array |
373
|
|
|
*/ |
374
|
|
|
public function getAutoEditsCounts(Page $page, $start, $end): array |
375
|
|
|
{ |
376
|
|
|
$cacheKey = $this->getCacheKey(func_get_args(), 'user_autoeditcount'); |
377
|
|
|
if ($this->cache->hasItem($cacheKey)) { |
378
|
|
|
return $this->cache->getItem($cacheKey)->get(); |
379
|
|
|
} |
380
|
|
|
|
381
|
|
|
$project = $page->getProject(); |
382
|
|
|
$tools = $this->getTools($project); |
383
|
|
|
$queries = []; |
384
|
|
|
$revisionTable = $project->getTableName('revision', ''); |
385
|
|
|
$pageTable = $project->getTableName('page'); |
386
|
|
|
$pageJoin = "LEFT JOIN $pageTable ON rev_page = page_id"; |
387
|
|
|
$revDateConditions = $this->getDateConditions($start, $end); |
388
|
|
|
$conn = $this->getProjectsConnection($project); |
389
|
|
|
|
390
|
|
|
foreach ($tools as $toolName => $values) { |
391
|
|
|
[$condTool, $commentJoin, $tagJoin] = $this->getInnerAutomatedCountsSql($project, $toolName, $values); |
392
|
|
|
$toolName = $conn->quote($toolName, PDO::PARAM_STR); |
393
|
|
|
|
394
|
|
|
// No regex or tag provided for this tool. This can happen for tag-only tools that are in the global |
395
|
|
|
// configuration, but no local tag exists on the said project. |
396
|
|
|
if ('' === $condTool) { |
397
|
|
|
continue; |
398
|
|
|
} |
399
|
|
|
|
400
|
|
|
$queries[] .= " |
401
|
|
|
SELECT $toolName AS toolname, COUNT(DISTINCT(rev_id)) AS count |
402
|
|
|
FROM $revisionTable |
403
|
|
|
$pageJoin |
404
|
|
|
$commentJoin |
405
|
|
|
$tagJoin |
406
|
|
|
WHERE $condTool |
407
|
|
|
AND rev_page = :pageId |
408
|
|
|
$revDateConditions"; |
409
|
|
|
} |
410
|
|
|
|
411
|
|
|
$sql = implode(' UNION ', $queries); |
412
|
|
|
$resultQuery = $this->executeProjectsQuery($project, $sql, [ |
413
|
|
|
'pageId' => $page->getId(), |
414
|
|
|
]); |
415
|
|
|
|
416
|
|
|
$results = []; |
417
|
|
|
|
418
|
|
|
while ($row = $resultQuery->fetchAssociative()) { |
419
|
|
|
// Only track tools that they've used at least once |
420
|
|
|
$tool = $row['toolname']; |
421
|
|
|
if ($row['count'] > 0) { |
422
|
|
|
$results[$tool] = [ |
423
|
|
|
'link' => $tools[$tool]['link'], |
424
|
|
|
'label' => $tools[$tool]['label'] ?? $tool, |
425
|
|
|
'count' => $row['count'], |
426
|
|
|
]; |
427
|
|
|
} |
428
|
|
|
} |
429
|
|
|
|
430
|
|
|
// Sort the array by count |
431
|
|
|
uasort($results, function ($a, $b) { |
432
|
|
|
return $b['count'] - $a['count']; |
433
|
|
|
}); |
434
|
|
|
|
435
|
|
|
return $this->setCache($cacheKey, $results); |
436
|
|
|
} |
437
|
|
|
} |
438
|
|
|
|