Passed
Push — deletion-summary ( 54f099 )
by MusikAnimal
06:52 queued 33s
created

PagesRepository::getDeletionSummary()   A

Complexity

Conditions 1
Paths 1

Size

Total Lines 18
Code Lines 12

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
cc 1
eloc 12
nc 1
nop 4
dl 0
loc 18
rs 9.8666
c 0
b 0
f 0
1
<?php
2
3
declare(strict_types = 1);
4
5
namespace App\Repository;
6
7
use App\Model\Pages;
8
use App\Model\Project;
9
use App\Model\User;
10
11
/**
12
 * An PagesRepository is responsible for retrieving information from the
13
 * databases for the Pages Created tool. It does not do any post-processing
14
 * of that data.
15
 * @codeCoverageIgnore
16
 */
17
class PagesRepository extends UserRepository
18
{
19
    /**
20
     * Count the number of pages created by a user.
21
     * @param Project $project
22
     * @param User $user
23
     * @param string|int $namespace Namespace ID or 'all'.
24
     * @param string $redirects One of the Pages::REDIR_ constants.
25
     * @param string $deleted One of the Pages::DEL_ constants.
26
     * @param int|false $start Start date as Unix timestamp.
27
     * @param int|false $end End date as Unix timestamp.
28
     * @return string[] Result of query, see below. Includes live and deleted pages.
29
     */
30
    public function countPagesCreated(
31
        Project $project,
32
        User $user,
33
        $namespace,
34
        string $redirects,
35
        string $deleted,
36
        $start = false,
37
        $end = false
38
    ): array {
39
        $cacheKey = $this->getCacheKey(func_get_args(), 'num_user_pages_created');
40
        if ($this->cache->hasItem($cacheKey)) {
41
            return $this->cache->getItem($cacheKey)->get();
42
        }
43
44
        $conditions = [
45
            'paSelects' => '',
46
            'paSelectsArchive' => '',
47
            'paJoin' => '',
48
            'revPageGroupBy' => '',
49
        ];
50
        $conditions = array_merge(
51
            $conditions,
52
            $this->getNamespaceRedirectAndDeletedPagesConditions($namespace, $redirects),
53
            $this->getUserConditions('' !== $start.$end)
54
        );
55
56
        $wasRedirect = $this->getWasRedirectClause($redirects, $deleted);
57
        $summation = Pages::DEL_NONE !== $deleted ? 'redirect OR was_redirect' : 'redirect';
58
59
        $sql = "SELECT `namespace`,
60
                    COUNT(page_title) AS `count`,
61
                    SUM(IF(type = 'arc', 1, 0)) AS `deleted`,
62
                    SUM($summation) AS `redirects`,
63
                    SUM(rev_length) AS `total_length`
64
                FROM (" .
65
            $this->getPagesCreatedInnerSql($project, $conditions, $deleted, $start, $end, false, true)."
66
                ) a ".
67
                $wasRedirect .
68
                "GROUP BY `namespace`";
69
70
        $result = $this->executeQuery($sql, $project, $user, $namespace)
71
            ->fetchAllAssociative();
72
73
        // Cache and return.
74
        return $this->setCache($cacheKey, $result);
75
    }
76
77
    /**
78
     * Get pages created by a user.
79
     * @param Project $project
80
     * @param User $user
81
     * @param string|int $namespace Namespace ID or 'all'.
82
     * @param string $redirects One of the Pages::REDIR_ constants.
83
     * @param string $deleted One of the Pages::DEL_ constants.
84
     * @param int|false $start Start date as Unix timestamp.
85
     * @param int|false $end End date as Unix timestamp.
86
     * @param int|null $limit Number of results to return, or blank to return all.
87
     * @param false|int $offset Unix timestamp. Used for pagination.
88
     * @return string[] Result of query, see below. Includes live and deleted pages.
89
     */
90
    public function getPagesCreated(
91
        Project $project,
92
        User $user,
93
        $namespace,
94
        string $redirects,
95
        string $deleted,
96
        $start = false,
97
        $end = false,
98
        ?int $limit = 1000,
99
        $offset = false
100
    ): array {
101
        $cacheKey = $this->getCacheKey(func_get_args(), 'user_pages_created');
102
        if ($this->cache->hasItem($cacheKey)) {
103
            return $this->cache->getItem($cacheKey)->get();
104
        }
105
106
        $conditions = [
107
            'paSelects' => '',
108
            'paSelectsArchive' => '',
109
            'paJoin' => '',
110
            'revPageGroupBy' => '',
111
        ];
112
113
        $conditions = array_merge(
114
            $conditions,
115
            $this->getNamespaceRedirectAndDeletedPagesConditions($namespace, $redirects),
116
            $this->getUserConditions('' !== $start.$end)
117
        );
118
119
        $hasPageAssessments = $this->isWMF && $project->hasPageAssessments();
120
        if ($hasPageAssessments) {
121
            $pageAssessmentsTable = $project->getTableName('page_assessments');
122
            $conditions['paSelects'] = ', pa_class';
123
            $conditions['paSelectsArchive'] = ', NULL AS pa_class';
124
            $conditions['paJoin'] = "LEFT JOIN $pageAssessmentsTable ON rev_page = pa_page_id";
125
            $conditions['revPageGroupBy'] = 'GROUP BY rev_page';
126
        }
127
128
        $wasRedirect = $this->getWasRedirectClause($redirects, $deleted);
129
130
        $sql = "SELECT * FROM (".
131
            $this->getPagesCreatedInnerSql($project, $conditions, $deleted, $start, $end, $offset)."
132
                ) a ".
133
                $wasRedirect .
134
                "ORDER BY `timestamp` DESC
135
                ".(!empty($limit) ? "LIMIT $limit" : '');
136
137
        $result = $this->executeQuery($sql, $project, $user, $namespace)
138
            ->fetchAllAssociative();
139
140
        // Cache and return.
141
        return $this->setCache($cacheKey, $result);
142
    }
143
144
    private function getWasRedirectClause(string $redirects, string $deleted): string
145
    {
146
        if (Pages::REDIR_NONE === $redirects) {
147
            return "WHERE was_redirect IS NULL ";
148
        } elseif (Pages::REDIR_ONLY === $redirects && Pages::DEL_ONLY === $deleted) {
149
            return "WHERE was_redirect = 1 ";
150
        } elseif (Pages::REDIR_ONLY === $redirects && Pages::DEL_ALL === $deleted) {
151
            return "WHERE was_redirect = 1 OR redirect = 1 ";
152
        }
153
        return '';
154
    }
155
156
    /**
157
     * Get SQL fragments for the namespace and redirects,
158
     * to be used in self::getPagesCreatedInnerSql().
159
     * @param string|int $namespace Namespace ID or 'all'.
160
     * @param string $redirects One of the Pages::REDIR_ constants.
161
     * @return string[] With keys 'namespaceRev', 'namespaceArc' and 'redirects'
162
     */
163
    private function getNamespaceRedirectAndDeletedPagesConditions($namespace, string $redirects): array
164
    {
165
        $conditions = [
166
            'namespaceArc' => '',
167
            'namespaceRev' => '',
168
            'redirects' => '',
169
        ];
170
171
        if ('all' !== $namespace) {
172
            $conditions['namespaceRev'] = " AND page_namespace = '".intval($namespace)."' ";
173
            $conditions['namespaceArc'] = " AND ar_namespace = '".intval($namespace)."' ";
174
        }
175
176
        if (Pages::REDIR_ONLY == $redirects) {
177
            $conditions['redirects'] = " AND page_is_redirect = '1' ";
178
        } elseif (Pages::REDIR_NONE == $redirects) {
179
            $conditions['redirects'] = " AND page_is_redirect = '0' ";
180
        }
181
182
        return $conditions;
183
    }
184
185
    /**
186
     * Inner SQL for getting or counting pages created by the user.
187
     * @param Project $project
188
     * @param string[] $conditions Conditions for the SQL, must include 'paSelects',
189
     *     'paSelectsArchive', 'paJoin', 'whereRev', 'whereArc', 'namespaceRev', 'namespaceArc',
190
     *     'redirects' and 'revPageGroupBy'.
191
     * @param string $deleted One of the Pages::DEL_ constants.
192
     * @param int|false $start Start date as Unix timestamp.
193
     * @param int|false $end End date as Unix timestamp.
194
     * @param int|false $offset Unix timestamp, used for pagination.
195
     * @param bool $count Omit unneeded columns from the SELECT clause.
196
     * @return string Raw SQL.
197
     */
198
    private function getPagesCreatedInnerSql(
199
        Project $project,
200
        array $conditions,
201
        string $deleted,
202
        $start,
203
        $end,
204
        $offset = false,
205
        bool $count = false
206
    ): string {
207
        $pageTable = $project->getTableName('page');
208
        $revisionTable = $project->getTableName('revision');
209
        $archiveTable = $project->getTableName('archive');
210
        $logTable = $project->getTableName('logging', 'logindex');
211
212
        // Only SELECT things that are needed, based on whether or not we're doing a COUNT.
213
        $revSelects = "DISTINCT page_namespace AS `namespace`, 'rev' AS `type`, page_title, "
214
            . "page_is_redirect AS `redirect`, rev_len AS `rev_length`";
215
        if (!$count) {
216
            $revSelects .= ", page_len AS `length`, rev_timestamp AS `timestamp`, "
217
                . "rev_id, NULL AS `recreated` ";
218
        }
219
220
        $revDateConditions = $this->getDateConditions($start, $end, $offset);
221
        $arDateConditions = $this->getDateConditions($start, $end, $offset, '', 'ar_timestamp');
222
223
        $tagTable = $project->getTableName('change_tag');
224
        $tagDefTable = $project->getTableName('change_tag_def');
225
226
        $revisionsSelect = "
227
            SELECT $revSelects ".$conditions['paSelects'].",
228
                NULL AS was_redirect
229
            FROM $pageTable
230
            JOIN $revisionTable ON page_id = rev_page ".
231
            $conditions['paJoin']."
232
            WHERE ".$conditions['whereRev']."
233
                AND rev_parent_id = '0'".
234
                $conditions['namespaceRev'].
235
                $conditions['redirects'].
236
                $revDateConditions.
237
            $conditions['revPageGroupBy'];
238
239
        // Only SELECT things that are needed, based on whether or not we're doing a COUNT.
240
        $arSelects = "ar_namespace AS `namespace`, 'arc' AS `type`, ar_title AS `page_title`, "
241
            . "'0' AS `redirect`, ar_len AS `rev_length`";
242
        if (!$count) {
243
            $arSelects .= ", NULL AS `length`, MIN(ar_timestamp) AS `timestamp`, ".
244
                "ar_rev_id AS `rev_id`, EXISTS(
245
                    SELECT 1 FROM $pageTable
246
                    WHERE page_namespace = ar_namespace
247
                    AND page_title = ar_title
248
                ) AS `recreated`";
249
        }
250
251
        $archiveSelect = "
252
            SELECT $arSelects ".$conditions['paSelectsArchive'].",
253
                (
254
                    SELECT 1
255
                    FROM $tagTable
256
                    WHERE ct_rev_id = ar_rev_id
257
                    AND ct_tag_id = (
258
                        SELECT ctd_id
259
                        FROM $tagDefTable
260
                        WHERE ctd_name = 'mw-new-redirect'
261
                    )
262
                    LIMIT 1
263
                ) AS `was_redirect`
264
            FROM $archiveTable
265
            LEFT JOIN $logTable ON log_namespace = ar_namespace AND log_title = ar_title
266
                AND log_actor = ar_actor AND (log_action = 'move' OR log_action = 'move_redir')
267
                AND log_type = 'move'
268
            WHERE ".$conditions['whereArc']."
269
                AND ar_parent_id = '0' ".
270
                $conditions['namespaceArc']."
271
                AND log_action IS NULL
272
                $arDateConditions
273
            GROUP BY ar_namespace, ar_title";
274
275
        if ('live' === $deleted) {
276
            return $revisionsSelect;
277
        } elseif ('deleted' === $deleted) {
278
            return $archiveSelect;
279
        }
280
281
        return "($revisionsSelect) UNION ($archiveSelect)";
282
    }
283
284
    /**
285
     * Get the number of pages the user created by assessment.
286
     * @param Project $project
287
     * @param User $user
288
     * @param int|string $namespace
289
     * @param string $redirects One of the Pages::REDIR_ constants.
290
     * @param int|false $start Start date as Unix timestamp.
291
     * @param int|false $end End date as Unix timestamp.
292
     * @return array Keys are the assessment class, values are the counts.
293
     */
294
    public function getAssessmentCounts(
295
        Project $project,
296
        User $user,
297
        $namespace,
298
        string $redirects,
299
        $start = false,
300
        $end = false
301
    ): array {
302
        $cacheKey = $this->getCacheKey(func_get_args(), 'user_pages_created_assessments');
303
        if ($this->cache->hasItem($cacheKey)) {
304
            return $this->cache->getItem($cacheKey)->get();
305
        }
306
307
        $pageTable = $project->getTableName('page');
308
        $revisionTable = $project->getTableName('revision');
309
        $pageAssessmentsTable = $project->getTableName('page_assessments');
310
311
        $conditions = array_merge(
312
            $this->getNamespaceRedirectAndDeletedPagesConditions($namespace, $redirects),
313
            $this->getUserConditions('' !== $start.$end)
314
        );
315
        $revDateConditions = $this->getDateConditions($start, $end);
316
317
        $sql = "SELECT pa_class AS `class`, COUNT(pa_class) AS `count` FROM (
318
                    SELECT DISTINCT page_id, IFNULL(pa_class, '') AS pa_class
319
                    FROM $pageTable
320
                    JOIN $revisionTable ON page_id = rev_page
321
                    LEFT JOIN $pageAssessmentsTable ON rev_page = pa_page_id
322
                    WHERE ".$conditions['whereRev']."
323
                    AND rev_parent_id = '0'".
324
                    $conditions['namespaceRev'].
325
                    $conditions['redirects'].
326
                    $revDateConditions."
327
                    GROUP BY page_id
328
                ) a
329
                GROUP BY pa_class";
330
331
        $resultQuery = $this->executeQuery($sql, $project, $user, $namespace);
332
333
        $assessments = [];
334
        while ($result = $resultQuery->fetchAssociative()) {
335
            $class = '' == $result['class'] ? '' : $result['class'];
336
            $assessments[$class] = $result['count'];
337
        }
338
339
        // Cache and return.
340
        return $this->setCache($cacheKey, $assessments);
341
    }
342
343
    /**
344
     * Fetch the closest 'delete' event as of the time of the given $offset.
345
     *
346
     * @param Project $project
347
     * @param int $namespace
348
     * @param string $pageTitle
349
     * @param string $offset
350
     * @return array
351
     */
352
    public function getDeletionSummary(Project $project, int $namespace, string $pageTitle, string $offset): array
353
    {
354
        $actorTable = $project->getTableName('actor');
355
        $commentTable = $project->getTableName('comment');
356
        $loggingTable = $project->getTableName('logging', 'logindex');
357
        $sql = "SELECT actor_name, comment_text, log_timestamp
358
                FROM $loggingTable
359
                JOIN $actorTable ON actor_id = log_actor
360
                JOIN $commentTable ON comment_id = log_comment_id
361
                WHERE log_namespace = $namespace
362
                AND log_title = :pageTitle
363
                AND log_timestamp >= $offset
364
                AND log_type = 'delete'
365
                AND log_action IN ('delete', 'delete_redir', 'delete_redir2')
366
                LIMIT 1";
367
        return $this->executeProjectsQuery($project, $sql, [
368
            'pageTitle' => str_replace(' ', '_', $pageTitle),
369
        ])->fetchAssociative();
370
    }
371
}
372