Passed
Pull Request — main (#442)
by MusikAnimal
08:40 queued 04:21
created

GlobalContribsRepository::__construct()   A

Complexity

Conditions 1
Paths 1

Size

Total Lines 15
Code Lines 5

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
cc 1
eloc 5
nc 1
nop 7
dl 0
loc 15
rs 10
c 0
b 0
f 0
1
<?php
2
3
declare(strict_types = 1);
4
5
namespace App\Repository;
6
7
use App\Model\Project;
8
use App\Model\User;
9
use GuzzleHttp\Client;
10
use PDO;
11
use Psr\Cache\CacheItemPoolInterface;
12
use Psr\Container\ContainerInterface;
13
use Psr\Log\LoggerInterface;
14
use Wikimedia\IPUtils;
15
16
/**
17
 * A GlobalContribsRepository is responsible for retrieving information from the database for the GlobalContribs tool.
18
 * @codeCoverageIgnore
19
 */
20
class GlobalContribsRepository extends Repository
21
{
22
    protected ProjectRepository $projectRepo;
23
24
    /** @var Project CentralAuth project (meta.wikimedia for WMF installation). */
25
    protected Project $caProject;
26
27
    public function __construct(
28
        ContainerInterface $container,
29
        CacheItemPoolInterface $cache,
30
        Client $guzzle,
31
        LoggerInterface $logger,
32
        bool $isWMF,
33
        int $queryTimeout,
34
        ProjectRepository $projectRepo
35
    ) {
36
        parent::__construct($container, $cache, $guzzle, $logger, $isWMF, $queryTimeout);
37
        $this->caProject = new Project(
38
            $this->container->getParameter('central_auth_project')
0 ignored issues
show
Bug introduced by
The method getParameter() does not exist on Psr\Container\ContainerInterface. It seems like you code against a sub-type of Psr\Container\ContainerInterface such as Symfony\Component\Depend...tion\ContainerInterface. ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-call  annotation

38
            $this->container->/** @scrutinizer ignore-call */ 
39
                              getParameter('central_auth_project')
Loading history...
39
        );
40
        $this->projectRepo = $projectRepo;
41
        $this->caProject->setRepository($this->projectRepo);
42
    }
43
44
    /**
45
     * Get a user's edit count for each project.
46
     * @see GlobalContribsRepository::globalEditCountsFromCentralAuth()
47
     * @see GlobalContribsRepository::globalEditCountsFromDatabases()
48
     * @param User $user The user.
49
     * @return mixed[] Elements are arrays with 'project' (Project), and 'total' (int). Null if anon (too slow).
50
     */
51
    public function globalEditCounts(User $user): ?array
52
    {
53
        if ($user->isAnon()) {
54
            return null;
55
        }
56
57
        // Get the edit counts from CentralAuth or database.
58
        $editCounts = $this->globalEditCountsFromCentralAuth($user);
59
60
        // Pre-populate all projects' metadata, to prevent each project call from fetching it.
61
        $this->caProject->getRepository()->getAll();
0 ignored issues
show
Bug introduced by
The method getAll() does not exist on App\Repository\Repository. It seems like you code against a sub-type of App\Repository\Repository such as App\Repository\ProjectRepository. ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-call  annotation

61
        $this->caProject->getRepository()->/** @scrutinizer ignore-call */ getAll();
Loading history...
62
63
        // Compile the output.
64
        $out = [];
65
        foreach ($editCounts as $editCount) {
66
            $project = new Project($editCount['dbName']);
67
            $project->setRepository($this->projectRepo);
68
            $out[] = [
69
                'dbName' => $editCount['dbName'],
70
                'total' => $editCount['total'],
71
                'project' => $project,
72
            ];
73
        }
74
        return $out;
75
    }
76
77
    /**
78
     * Get a user's total edit count on one or more project.
79
     * Requires the CentralAuth extension to be installed on the project.
80
     * @param User $user The user.
81
     * @return array|null Elements are arrays with 'dbName' (string), and 'total' (int). Null for logged out users.
82
     */
83
    protected function globalEditCountsFromCentralAuth(User $user): ?array
84
    {
85
        if (true === $user->isAnon()) {
86
            return null;
87
        }
88
89
        // Set up cache.
90
        $cacheKey = $this->getCacheKey(func_get_args(), 'gc_globaleditcounts');
91
        if ($this->cache->hasItem($cacheKey)) {
92
            return $this->cache->getItem($cacheKey)->get();
93
        }
94
95
        $params = [
96
            'meta' => 'globaluserinfo',
97
            'guiprop' => 'editcount|merged',
98
            'guiuser' => $user->getUsername(),
99
        ];
100
        $result = $this->executeApiRequest($this->caProject, $params);
101
        if (!isset($result['query']['globaluserinfo']['merged'])) {
102
            return [];
103
        }
104
        $out = [];
105
        foreach ($result['query']['globaluserinfo']['merged'] as $result) {
106
            $out[] = [
107
                'dbName' => $result['wiki'],
108
                'total' => $result['editcount'],
109
            ];
110
        }
111
112
        // Cache and return.
113
        return $this->setCache($cacheKey, $out);
114
    }
115
116
    /**
117
     * Loop through the given dbNames and create Project objects for each.
118
     * @param array $dbNames
119
     * @return Project[] Keyed by database name.
120
     */
121
    private function formatProjects(array $dbNames): array
122
    {
123
        $projects = [];
124
125
        foreach ($dbNames as $dbName) {
126
            $projects[$dbName] = $this->projectRepo->getProject($dbName);
127
        }
128
129
        return $projects;
130
    }
131
132
    /**
133
     * Get all Projects on which the user has made at least one edit.
134
     * @param User $user
135
     * @return Project[]
136
     */
137
    public function getProjectsWithEdits(User $user): array
138
    {
139
        if ($user->isAnon()) {
140
            $dbNames = array_keys($this->getDbNamesAndActorIds($user));
141
        } else {
142
            $dbNames = [];
143
144
            foreach ($this->globalEditCountsFromCentralAuth($user) as $projectMeta) {
145
                if ($projectMeta['total'] > 0) {
146
                    $dbNames[] = $projectMeta['dbName'];
147
                }
148
            }
149
        }
150
151
        return $this->formatProjects($dbNames);
152
    }
153
154
    /**
155
     * Get projects that the user has made at least one edit on, and the associated actor ID.
156
     * @param User $user
157
     * @param string[] $dbNames Loop over these projects instead of all of them.
158
     * @return array Keys are database names, values are actor IDs.
159
     */
160
    public function getDbNamesAndActorIds(User $user, ?array $dbNames = null): array
161
    {
162
        // Check cache.
163
        $cacheKey = $this->getCacheKey(func_get_args(), 'gc_db_names_actor_ids');
164
        if ($this->cache->hasItem($cacheKey)) {
165
            return $this->cache->getItem($cacheKey)->get();
166
        }
167
168
        if (!$dbNames) {
169
            $dbNames = array_column($this->caProject->getRepository()->getAll(), 'dbName');
170
        }
171
172
        if ($user->isIpRange()) {
173
            $username = $user->getIpSubstringFromCidr().'%';
174
            $whereClause = "actor_name LIKE :actor";
175
        } else {
176
            $username = $user->getUsername();
177
            $whereClause = "actor_name = :actor";
178
        }
179
180
        $queriesBySlice = [];
181
182
        foreach ($dbNames as $dbName) {
183
            $slice = $this->getDbList()[$dbName];
184
            // actor_revision table only includes users who have made at least one edit.
185
            $actorTable = $this->getTableName($dbName, 'actor', 'revision');
186
            $queriesBySlice[$slice][] = "SELECT '$dbName' AS `dbName`, actor_id " .
187
                "FROM $actorTable WHERE $whereClause";
188
        }
189
190
        $actorIds = [];
191
192
        foreach ($queriesBySlice as $slice => $queries) {
193
            $sql = implode(' UNION ', $queries);
194
            $resultQuery = $this->executeProjectsQuery($slice, $sql, [
195
                'actor' => $username,
196
            ]);
197
198
            while ($row = $resultQuery->fetchAssociative()) {
199
                $actorIds[$row['dbName']] = (int)$row['actor_id'];
200
            }
201
        }
202
203
        return $this->setCache($cacheKey, $actorIds);
204
    }
205
206
    /**
207
     * Get revisions by this user across the given Projects.
208
     * @param string[] $dbNames Database names of projects to iterate over.
209
     * @param User $user The user.
210
     * @param int|string $namespace Namespace ID or 'all' for all namespaces.
211
     * @param int|false $start Unix timestamp or false.
212
     * @param int|false $end Unix timestamp or false.
213
     * @param int $limit The maximum number of revisions to fetch from each project.
214
     * @param int|false $offset Unix timestamp. Used for pagination.
215
     * @return array
216
     */
217
    public function getRevisions(
218
        array $dbNames,
219
        User $user,
220
        $namespace = 'all',
221
        $start = false,
222
        $end = false,
223
        int $limit = 31, // One extra to know whether there should be another page.
224
        $offset = false
225
    ): array {
226
        // Check cache.
227
        $cacheKey = $this->getCacheKey(func_get_args(), 'gc_revisions');
228
        if ($this->cache->hasItem($cacheKey)) {
229
            return $this->cache->getItem($cacheKey)->get();
230
        }
231
232
        // Just need any Connection to use the ->quote() method.
233
        $quoteConn = $this->getProjectsConnection('s1');
234
        $username = $quoteConn->quote($user->getUsername(), PDO::PARAM_STR);
235
236
        // IP range handling.
237
        $startIp = '';
238
        $endIp = '';
239
        if ($user->isIpRange()) {
240
            [$startIp, $endIp] = IPUtils::parseRange($user->getUsername());
241
            $startIp = $quoteConn->quote($startIp, PDO::PARAM_STR);
242
            $endIp = $quoteConn->quote($endIp, PDO::PARAM_STR);
243
        }
244
245
        // Fetch actor IDs (for IP ranges, it strips trailing zeros and uses a LIKE query).
246
        $actorIds = $this->getDbNamesAndActorIds($user, $dbNames);
247
248
        if (!$actorIds) {
249
            return [];
250
        }
251
252
        $namespaceCond = 'all' === $namespace
253
            ? ''
254
            : 'AND page_namespace = '.(int)$namespace;
255
        $revDateConditions = $this->getDateConditions($start, $end, $offset, 'revs.', 'rev_timestamp');
256
257
        // Assemble queries.
258
        $queriesBySlice = [];
259
        $projectRepo = $this->caProject->getRepository();
260
        foreach ($dbNames as $dbName) {
261
            if (isset($actorIds[$dbName])) {
262
                $revisionTable = $projectRepo->getTableName($dbName, 'revision');
263
                $pageTable = $projectRepo->getTableName($dbName, 'page');
264
                $commentTable = $projectRepo->getTableName($dbName, 'comment', 'revision');
265
                $actorTable = $projectRepo->getTableName($dbName, 'actor', 'revision');
266
                $tagTable = $projectRepo->getTableName($dbName, 'change_tag');
267
                $tagDefTable = $projectRepo->getTableName($dbName, 'change_tag_def');
268
269
                if ($user->isIpRange()) {
270
                    $ipcTable = $projectRepo->getTableName($dbName, 'ip_changes');
271
                    $ipcJoin = "JOIN $ipcTable ON revs.rev_id = ipc_rev_id";
272
                    $whereClause = "ipc_hex BETWEEN $startIp AND $endIp";
273
                    $username = 'actor_name';
274
                } else {
275
                    $ipcJoin = '';
276
                    $whereClause = 'revs.rev_actor = '.$actorIds[$dbName];
277
                }
278
279
                $slice = $this->getDbList()[$dbName];
280
                $queriesBySlice[$slice][] = "
281
                    SELECT
282
                        '$dbName' AS dbName,
283
                        revs.rev_id AS id,
284
                        revs.rev_timestamp AS timestamp,
285
                        UNIX_TIMESTAMP(revs.rev_timestamp) AS unix_timestamp,
286
                        revs.rev_minor_edit AS minor,
287
                        revs.rev_deleted AS deleted,
288
                        revs.rev_len AS length,
289
                        (CAST(revs.rev_len AS SIGNED) - IFNULL(parentrevs.rev_len, 0)) AS length_change,
290
                        revs.rev_parent_id AS parent_id,
291
                        $username AS username,
292
                        page.page_title,
293
                        page.page_namespace,
294
                        comment_text AS comment,
295
                        (
296
                            SELECT 1
297
                            FROM $tagTable
298
                            WHERE ct_rev_id = revs.rev_id
299
                            AND ct_tag_id = (
300
                                SELECT ctd_id
301
                                FROM $tagDefTable
302
                                WHERE ctd_name = 'mw-reverted'
303
                            )
304
                            LIMIT 1
305
                        ) AS reverted
306
                    FROM $revisionTable AS revs
307
                        $ipcJoin
308
                        JOIN $pageTable AS page ON (rev_page = page_id)
309
                        JOIN $actorTable ON (actor_id = revs.rev_actor)
310
                        LEFT JOIN $revisionTable AS parentrevs ON (revs.rev_parent_id = parentrevs.rev_id)
311
                        LEFT OUTER JOIN $commentTable ON revs.rev_comment_id = comment_id
312
                    WHERE $whereClause
313
                        $namespaceCond
314
                        $revDateConditions";
315
            }
316
        }
317
318
        // Re-assemble into UNIONed queries, executing as many per slice as possible.
319
        $revisions = [];
320
        foreach ($queriesBySlice as $slice => $queries) {
321
            $sql = "SELECT * FROM ((\n" . join("\n) UNION (\n", $queries) . ")) a ORDER BY timestamp DESC LIMIT $limit";
322
            $revisions = array_merge($revisions, $this->executeProjectsQuery($slice, $sql)->fetchAllAssociative());
323
        }
324
325
        // If there are more than $limit results, re-sort by timestamp.
326
        if (count($revisions) > $limit) {
327
            usort($revisions, function ($a, $b) {
328
                if ($a['unix_timestamp'] === $b['unix_timestamp']) {
329
                    return 0;
330
                }
331
                return $a['unix_timestamp'] > $b['unix_timestamp'] ? -1 : 1;
332
            });
333
334
            // Truncate size to $limit.
335
            $revisions = array_slice($revisions, 0, $limit);
336
        }
337
338
        // Cache and return.
339
        return $this->setCache($cacheKey, $revisions);
340
    }
341
}
342