1
|
|
|
<?php |
2
|
|
|
|
3
|
|
|
declare(strict_types = 1); |
4
|
|
|
|
5
|
|
|
namespace App\Repository; |
6
|
|
|
|
7
|
|
|
use App\Model\Pages; |
8
|
|
|
use App\Model\Project; |
9
|
|
|
use App\Model\User; |
10
|
|
|
|
11
|
|
|
/** |
12
|
|
|
* An PagesRepository is responsible for retrieving information from the |
13
|
|
|
* databases for the Pages Created tool. It does not do any post-processing |
14
|
|
|
* of that data. |
15
|
|
|
* @codeCoverageIgnore |
16
|
|
|
*/ |
17
|
|
|
class PagesRepository extends UserRepository |
18
|
|
|
{ |
19
|
|
|
/** |
20
|
|
|
* Count the number of pages created by a user. |
21
|
|
|
* @param Project $project |
22
|
|
|
* @param User $user |
23
|
|
|
* @param string|int $namespace Namespace ID or 'all'. |
24
|
|
|
* @param string $redirects One of the Pages::REDIR_ constants. |
25
|
|
|
* @param string $deleted One of the Pages::DEL_ constants. |
26
|
|
|
* @param int|false $start Start date as Unix timestamp. |
27
|
|
|
* @param int|false $end End date as Unix timestamp. |
28
|
|
|
* @return string[] Result of query, see below. Includes live and deleted pages. |
29
|
|
|
*/ |
30
|
|
|
public function countPagesCreated( |
31
|
|
|
Project $project, |
32
|
|
|
User $user, |
33
|
|
|
$namespace, |
34
|
|
|
string $redirects, |
35
|
|
|
string $deleted, |
36
|
|
|
$start = false, |
37
|
|
|
$end = false |
38
|
|
|
): array { |
39
|
|
|
$cacheKey = $this->getCacheKey(func_get_args(), 'num_user_pages_created'); |
40
|
|
|
if ($this->cache->hasItem($cacheKey)) { |
41
|
|
|
return $this->cache->getItem($cacheKey)->get(); |
42
|
|
|
} |
43
|
|
|
|
44
|
|
|
$conditions = [ |
45
|
|
|
'paSelects' => '', |
46
|
|
|
'paSelectsArchive' => '', |
47
|
|
|
'paJoin' => '', |
48
|
|
|
'revPageGroupBy' => '', |
49
|
|
|
]; |
50
|
|
|
$conditions = array_merge( |
51
|
|
|
$conditions, |
52
|
|
|
$this->getNamespaceRedirectAndDeletedPagesConditions($namespace, $redirects), |
53
|
|
|
$this->getUserConditions('' !== $start.$end) |
54
|
|
|
); |
55
|
|
|
|
56
|
|
|
$wasRedirect = $this->getWasRedirectClause($redirects, $deleted); |
57
|
|
|
$summation = Pages::DEL_NONE !== $deleted ? 'redirect OR was_redirect' : 'redirect'; |
58
|
|
|
|
59
|
|
|
$sql = "SELECT `namespace`, |
60
|
|
|
COUNT(page_title) AS `count`, |
61
|
|
|
SUM(IF(type = 'arc', 1, 0)) AS `deleted`, |
62
|
|
|
SUM($summation) AS `redirects`, |
63
|
|
|
SUM(rev_length) AS `total_length` |
64
|
|
|
FROM (" . |
65
|
|
|
$this->getPagesCreatedInnerSql($project, $conditions, $deleted, $start, $end, false, true)." |
66
|
|
|
) a ". |
67
|
|
|
$wasRedirect . |
68
|
|
|
"GROUP BY `namespace`"; |
69
|
|
|
|
70
|
|
|
$result = $this->executeQuery($sql, $project, $user, $namespace) |
71
|
|
|
->fetchAllAssociative(); |
72
|
|
|
|
73
|
|
|
// Cache and return. |
74
|
|
|
return $this->setCache($cacheKey, $result); |
75
|
|
|
} |
76
|
|
|
|
77
|
|
|
/** |
78
|
|
|
* Get pages created by a user. |
79
|
|
|
* @param Project $project |
80
|
|
|
* @param User $user |
81
|
|
|
* @param string|int $namespace Namespace ID or 'all'. |
82
|
|
|
* @param string $redirects One of the Pages::REDIR_ constants. |
83
|
|
|
* @param string $deleted One of the Pages::DEL_ constants. |
84
|
|
|
* @param int|false $start Start date as Unix timestamp. |
85
|
|
|
* @param int|false $end End date as Unix timestamp. |
86
|
|
|
* @param int|null $limit Number of results to return, or blank to return all. |
87
|
|
|
* @param false|int $offset Unix timestamp. Used for pagination. |
88
|
|
|
* @return string[] Result of query, see below. Includes live and deleted pages. |
89
|
|
|
*/ |
90
|
|
|
public function getPagesCreated( |
91
|
|
|
Project $project, |
92
|
|
|
User $user, |
93
|
|
|
$namespace, |
94
|
|
|
string $redirects, |
95
|
|
|
string $deleted, |
96
|
|
|
$start = false, |
97
|
|
|
$end = false, |
98
|
|
|
?int $limit = 1000, |
99
|
|
|
$offset = false |
100
|
|
|
): array { |
101
|
|
|
$cacheKey = $this->getCacheKey(func_get_args(), 'user_pages_created'); |
102
|
|
|
if ($this->cache->hasItem($cacheKey)) { |
103
|
|
|
return $this->cache->getItem($cacheKey)->get(); |
104
|
|
|
} |
105
|
|
|
|
106
|
|
|
$conditions = [ |
107
|
|
|
'paSelects' => '', |
108
|
|
|
'paSelectsArchive' => '', |
109
|
|
|
'paJoin' => '', |
110
|
|
|
'revPageGroupBy' => '', |
111
|
|
|
]; |
112
|
|
|
|
113
|
|
|
$conditions = array_merge( |
114
|
|
|
$conditions, |
115
|
|
|
$this->getNamespaceRedirectAndDeletedPagesConditions($namespace, $redirects), |
116
|
|
|
$this->getUserConditions('' !== $start.$end) |
117
|
|
|
); |
118
|
|
|
|
119
|
|
|
$hasPageAssessments = $this->isWMF && $project->hasPageAssessments(); |
120
|
|
|
if ($hasPageAssessments) { |
121
|
|
|
$pageAssessmentsTable = $project->getTableName('page_assessments'); |
122
|
|
|
$conditions['paSelects'] = ', pa_class'; |
123
|
|
|
$conditions['paSelectsArchive'] = ', NULL AS pa_class'; |
124
|
|
|
$conditions['paJoin'] = "LEFT JOIN $pageAssessmentsTable ON rev_page = pa_page_id"; |
125
|
|
|
$conditions['revPageGroupBy'] = 'GROUP BY rev_page'; |
126
|
|
|
} |
127
|
|
|
|
128
|
|
|
$wasRedirect = $this->getWasRedirectClause($redirects, $deleted); |
129
|
|
|
|
130
|
|
|
$sql = "SELECT * FROM (". |
131
|
|
|
$this->getPagesCreatedInnerSql($project, $conditions, $deleted, $start, $end, $offset)." |
132
|
|
|
) a ". |
133
|
|
|
$wasRedirect . |
134
|
|
|
"ORDER BY `timestamp` DESC |
135
|
|
|
".(!empty($limit) ? "LIMIT $limit" : ''); |
136
|
|
|
|
137
|
|
|
$result = $this->executeQuery($sql, $project, $user, $namespace) |
138
|
|
|
->fetchAllAssociative(); |
139
|
|
|
|
140
|
|
|
// Cache and return. |
141
|
|
|
return $this->setCache($cacheKey, $result); |
142
|
|
|
} |
143
|
|
|
|
144
|
|
|
private function getWasRedirectClause(string $redirects, string $deleted): string |
145
|
|
|
{ |
146
|
|
|
if (Pages::REDIR_NONE === $redirects) { |
147
|
|
|
return "WHERE was_redirect IS NULL "; |
148
|
|
|
} elseif (Pages::REDIR_ONLY === $redirects && Pages::DEL_ONLY === $deleted) { |
149
|
|
|
return "WHERE was_redirect = 1 "; |
150
|
|
|
} elseif (Pages::REDIR_ONLY === $redirects && Pages::DEL_ALL === $deleted) { |
151
|
|
|
return "WHERE was_redirect = 1 OR redirect = 1 "; |
152
|
|
|
} |
153
|
|
|
return ''; |
154
|
|
|
} |
155
|
|
|
|
156
|
|
|
/** |
157
|
|
|
* Get SQL fragments for the namespace and redirects, |
158
|
|
|
* to be used in self::getPagesCreatedInnerSql(). |
159
|
|
|
* @param string|int $namespace Namespace ID or 'all'. |
160
|
|
|
* @param string $redirects One of the Pages::REDIR_ constants. |
161
|
|
|
* @return string[] With keys 'namespaceRev', 'namespaceArc' and 'redirects' |
162
|
|
|
*/ |
163
|
|
|
private function getNamespaceRedirectAndDeletedPagesConditions($namespace, string $redirects): array |
164
|
|
|
{ |
165
|
|
|
$conditions = [ |
166
|
|
|
'namespaceArc' => '', |
167
|
|
|
'namespaceRev' => '', |
168
|
|
|
'redirects' => '', |
169
|
|
|
]; |
170
|
|
|
|
171
|
|
|
if ('all' !== $namespace) { |
172
|
|
|
$conditions['namespaceRev'] = " AND page_namespace = '".intval($namespace)."' "; |
173
|
|
|
$conditions['namespaceArc'] = " AND ar_namespace = '".intval($namespace)."' "; |
174
|
|
|
} |
175
|
|
|
|
176
|
|
|
if (Pages::REDIR_ONLY == $redirects) { |
177
|
|
|
$conditions['redirects'] = " AND page_is_redirect = '1' "; |
178
|
|
|
} elseif (Pages::REDIR_NONE == $redirects) { |
179
|
|
|
$conditions['redirects'] = " AND page_is_redirect = '0' "; |
180
|
|
|
} |
181
|
|
|
|
182
|
|
|
return $conditions; |
183
|
|
|
} |
184
|
|
|
|
185
|
|
|
/** |
186
|
|
|
* Inner SQL for getting or counting pages created by the user. |
187
|
|
|
* @param Project $project |
188
|
|
|
* @param string[] $conditions Conditions for the SQL, must include 'paSelects', |
189
|
|
|
* 'paSelectsArchive', 'paJoin', 'whereRev', 'whereArc', 'namespaceRev', 'namespaceArc', |
190
|
|
|
* 'redirects' and 'revPageGroupBy'. |
191
|
|
|
* @param string $deleted One of the Pages::DEL_ constants. |
192
|
|
|
* @param int|false $start Start date as Unix timestamp. |
193
|
|
|
* @param int|false $end End date as Unix timestamp. |
194
|
|
|
* @param int|false $offset Unix timestamp, used for pagination. |
195
|
|
|
* @param bool $count Omit unneeded columns from the SELECT clause. |
196
|
|
|
* @return string Raw SQL. |
197
|
|
|
*/ |
198
|
|
|
private function getPagesCreatedInnerSql( |
199
|
|
|
Project $project, |
200
|
|
|
array $conditions, |
201
|
|
|
string $deleted, |
202
|
|
|
$start, |
203
|
|
|
$end, |
204
|
|
|
$offset = false, |
205
|
|
|
bool $count = false |
206
|
|
|
): string { |
207
|
|
|
$pageTable = $project->getTableName('page'); |
208
|
|
|
$revisionTable = $project->getTableName('revision'); |
209
|
|
|
$archiveTable = $project->getTableName('archive'); |
210
|
|
|
$logTable = $project->getTableName('logging', 'logindex'); |
211
|
|
|
|
212
|
|
|
// Only SELECT things that are needed, based on whether or not we're doing a COUNT. |
213
|
|
|
$revSelects = "DISTINCT page_namespace AS `namespace`, 'rev' AS `type`, page_title, " |
214
|
|
|
. "page_is_redirect AS `redirect`, rev_len AS `rev_length`"; |
215
|
|
|
if (!$count) { |
216
|
|
|
$revSelects .= ", page_len AS `length`, rev_timestamp AS `timestamp`, " |
217
|
|
|
. "rev_id, NULL AS `recreated` "; |
218
|
|
|
} |
219
|
|
|
|
220
|
|
|
$revDateConditions = $this->getDateConditions($start, $end, $offset); |
221
|
|
|
$arDateConditions = $this->getDateConditions($start, $end, $offset, '', 'ar_timestamp'); |
222
|
|
|
|
223
|
|
|
$tagTable = $project->getTableName('change_tag'); |
224
|
|
|
$tagDefTable = $project->getTableName('change_tag_def'); |
225
|
|
|
|
226
|
|
|
$revisionsSelect = " |
227
|
|
|
SELECT $revSelects ".$conditions['paSelects'].", |
228
|
|
|
NULL AS was_redirect |
229
|
|
|
FROM $pageTable |
230
|
|
|
JOIN $revisionTable ON page_id = rev_page ". |
231
|
|
|
$conditions['paJoin']." |
232
|
|
|
WHERE ".$conditions['whereRev']." |
233
|
|
|
AND rev_parent_id = '0'". |
234
|
|
|
$conditions['namespaceRev']. |
235
|
|
|
$conditions['redirects']. |
236
|
|
|
$revDateConditions. |
237
|
|
|
$conditions['revPageGroupBy']; |
238
|
|
|
|
239
|
|
|
// Only SELECT things that are needed, based on whether or not we're doing a COUNT. |
240
|
|
|
$arSelects = "ar_namespace AS `namespace`, 'arc' AS `type`, ar_title AS `page_title`, " |
241
|
|
|
. "'0' AS `redirect`, ar_len AS `rev_length`"; |
242
|
|
|
if (!$count) { |
243
|
|
|
$arSelects .= ", NULL AS `length`, MIN(ar_timestamp) AS `timestamp`, ". |
244
|
|
|
"ar_rev_id AS `rev_id`, EXISTS( |
245
|
|
|
SELECT 1 FROM $pageTable |
246
|
|
|
WHERE page_namespace = ar_namespace |
247
|
|
|
AND page_title = ar_title |
248
|
|
|
) AS `recreated`"; |
249
|
|
|
} |
250
|
|
|
|
251
|
|
|
$archiveSelect = " |
252
|
|
|
SELECT $arSelects ".$conditions['paSelectsArchive'].", |
253
|
|
|
( |
254
|
|
|
SELECT 1 |
255
|
|
|
FROM $tagTable |
256
|
|
|
WHERE ct_rev_id = ar_rev_id |
257
|
|
|
AND ct_tag_id = ( |
258
|
|
|
SELECT ctd_id |
259
|
|
|
FROM $tagDefTable |
260
|
|
|
WHERE ctd_name = 'mw-new-redirect' |
261
|
|
|
) |
262
|
|
|
LIMIT 1 |
263
|
|
|
) AS `was_redirect` |
264
|
|
|
FROM $archiveTable |
265
|
|
|
LEFT JOIN $logTable ON log_namespace = ar_namespace AND log_title = ar_title |
266
|
|
|
AND log_actor = ar_actor AND (log_action = 'move' OR log_action = 'move_redir') |
267
|
|
|
AND log_type = 'move' |
268
|
|
|
WHERE ".$conditions['whereArc']." |
269
|
|
|
AND ar_parent_id = '0' ". |
270
|
|
|
$conditions['namespaceArc']." |
271
|
|
|
AND log_action IS NULL |
272
|
|
|
$arDateConditions |
273
|
|
|
GROUP BY ar_namespace, ar_title"; |
274
|
|
|
|
275
|
|
|
if ('live' === $deleted) { |
276
|
|
|
return $revisionsSelect; |
277
|
|
|
} elseif ('deleted' === $deleted) { |
278
|
|
|
return $archiveSelect; |
279
|
|
|
} |
280
|
|
|
|
281
|
|
|
return "($revisionsSelect) UNION ($archiveSelect)"; |
282
|
|
|
} |
283
|
|
|
|
284
|
|
|
/** |
285
|
|
|
* Get the number of pages the user created by assessment. |
286
|
|
|
* @param Project $project |
287
|
|
|
* @param User $user |
288
|
|
|
* @param int|string $namespace |
289
|
|
|
* @param string $redirects One of the Pages::REDIR_ constants. |
290
|
|
|
* @param int|false $start Start date as Unix timestamp. |
291
|
|
|
* @param int|false $end End date as Unix timestamp. |
292
|
|
|
* @return array Keys are the assessment class, values are the counts. |
293
|
|
|
*/ |
294
|
|
|
public function getAssessmentCounts( |
295
|
|
|
Project $project, |
296
|
|
|
User $user, |
297
|
|
|
$namespace, |
298
|
|
|
string $redirects, |
299
|
|
|
$start = false, |
300
|
|
|
$end = false |
301
|
|
|
): array { |
302
|
|
|
$cacheKey = $this->getCacheKey(func_get_args(), 'user_pages_created_assessments'); |
303
|
|
|
if ($this->cache->hasItem($cacheKey)) { |
304
|
|
|
return $this->cache->getItem($cacheKey)->get(); |
305
|
|
|
} |
306
|
|
|
|
307
|
|
|
$pageTable = $project->getTableName('page'); |
308
|
|
|
$revisionTable = $project->getTableName('revision'); |
309
|
|
|
$pageAssessmentsTable = $project->getTableName('page_assessments'); |
310
|
|
|
|
311
|
|
|
$conditions = array_merge( |
312
|
|
|
$this->getNamespaceRedirectAndDeletedPagesConditions($namespace, $redirects), |
313
|
|
|
$this->getUserConditions('' !== $start.$end) |
314
|
|
|
); |
315
|
|
|
$revDateConditions = $this->getDateConditions($start, $end); |
316
|
|
|
|
317
|
|
|
$sql = "SELECT pa_class AS `class`, COUNT(pa_class) AS `count` FROM ( |
318
|
|
|
SELECT DISTINCT page_id, IFNULL(pa_class, '') AS pa_class |
319
|
|
|
FROM $pageTable |
320
|
|
|
JOIN $revisionTable ON page_id = rev_page |
321
|
|
|
LEFT JOIN $pageAssessmentsTable ON rev_page = pa_page_id |
322
|
|
|
WHERE ".$conditions['whereRev']." |
323
|
|
|
AND rev_parent_id = '0'". |
324
|
|
|
$conditions['namespaceRev']. |
325
|
|
|
$conditions['redirects']. |
326
|
|
|
$revDateConditions." |
327
|
|
|
GROUP BY page_id |
328
|
|
|
) a |
329
|
|
|
GROUP BY pa_class"; |
330
|
|
|
|
331
|
|
|
$resultQuery = $this->executeQuery($sql, $project, $user, $namespace); |
332
|
|
|
|
333
|
|
|
$assessments = []; |
334
|
|
|
while ($result = $resultQuery->fetchAssociative()) { |
335
|
|
|
$class = '' == $result['class'] ? '' : $result['class']; |
336
|
|
|
$assessments[$class] = $result['count']; |
337
|
|
|
} |
338
|
|
|
|
339
|
|
|
// Cache and return. |
340
|
|
|
return $this->setCache($cacheKey, $assessments); |
341
|
|
|
} |
342
|
|
|
|
343
|
|
|
/** |
344
|
|
|
* Fetch the closest 'delete' event as of the time of the given $offset. |
345
|
|
|
* |
346
|
|
|
* @param Project $project |
347
|
|
|
* @param int $namespace |
348
|
|
|
* @param string $pageTitle |
349
|
|
|
* @param string $offset |
350
|
|
|
* @return array |
351
|
|
|
*/ |
352
|
|
|
public function getDeletionSummary(Project $project, int $namespace, string $pageTitle, string $offset): array |
353
|
|
|
{ |
354
|
|
|
$actorTable = $project->getTableName('actor'); |
355
|
|
|
$commentTable = $project->getTableName('comment'); |
356
|
|
|
$loggingTable = $project->getTableName('logging', 'logindex'); |
357
|
|
|
$sql = "SELECT actor_name, comment_text, log_timestamp |
358
|
|
|
FROM $loggingTable |
359
|
|
|
JOIN $actorTable ON actor_id = log_actor |
360
|
|
|
JOIN $commentTable ON comment_id = log_comment_id |
361
|
|
|
WHERE log_namespace = $namespace |
362
|
|
|
AND log_title = :pageTitle |
363
|
|
|
AND log_timestamp >= $offset |
364
|
|
|
AND log_type = 'delete' |
365
|
|
|
AND log_action IN ('delete', 'delete_redir', 'delete_redir2') |
366
|
|
|
LIMIT 1"; |
367
|
|
|
return $this->executeProjectsQuery($project, $sql, [ |
368
|
|
|
'pageTitle' => str_replace(' ', '_', $pageTitle), |
369
|
|
|
])->fetchAssociative(); |
370
|
|
|
} |
371
|
|
|
} |
372
|
|
|
|