GlobalContribsRepository::__construct()   A
last analyzed

Complexity

Conditions 1
Paths 1

Size

Total Lines 15
Code Lines 4

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
cc 1
eloc 4
nc 1
nop 9
dl 0
loc 15
rs 10
c 0
b 0
f 0

How to fix   Many Parameters   

Many Parameters

Methods with many parameters are not only hard to understand, but their parameters also often become inconsistent when you need more, or different data.

There are several approaches to avoid long parameter lists:

1
<?php
2
3
declare(strict_types = 1);
4
5
namespace App\Repository;
6
7
use App\Model\Project;
8
use App\Model\User;
9
use Doctrine\Persistence\ManagerRegistry;
10
use GuzzleHttp\Client;
11
use PDO;
12
use Psr\Cache\CacheItemPoolInterface;
13
use Psr\Log\LoggerInterface;
14
use Symfony\Component\DependencyInjection\ParameterBag\ParameterBagInterface;
15
use Wikimedia\IPUtils;
16
17
/**
18
 * A GlobalContribsRepository is responsible for retrieving information from the database for the GlobalContribs tool.
19
 * @codeCoverageIgnore
20
 */
21
class GlobalContribsRepository extends Repository
22
{
23
    protected ProjectRepository $projectRepo;
24
25
    /** @var Project CentralAuth project (meta.wikimedia for WMF installation). */
26
    protected Project $caProject;
27
28
    public function __construct(
29
        ManagerRegistry $managerRegistry,
30
        CacheItemPoolInterface $cache,
31
        Client $guzzle,
32
        LoggerInterface $logger,
33
        ParameterBagInterface $parameterBag,
34
        bool $isWMF,
35
        int $queryTimeout,
36
        ProjectRepository $projectRepo,
37
        string $centralAuthProject
38
    ) {
39
        $this->caProject = new Project($centralAuthProject);
40
        $this->projectRepo = $projectRepo;
41
        $this->caProject->setRepository($this->projectRepo);
42
        parent::__construct($managerRegistry, $cache, $guzzle, $logger, $parameterBag, $isWMF, $queryTimeout);
43
    }
44
45
    /**
46
     * Get a user's edit count for each project.
47
     * @see GlobalContribsRepository::globalEditCountsFromCentralAuth()
48
     * @see GlobalContribsRepository::globalEditCountsFromDatabases()
49
     * @param User $user The user.
50
     * @return mixed[] Elements are arrays with 'project' (Project), and 'total' (int). Null if anon (too slow).
51
     */
52
    public function globalEditCounts(User $user): ?array
53
    {
54
        if ($user->isAnon()) {
55
            return null;
56
        }
57
58
        // Get the edit counts from CentralAuth or database.
59
        $editCounts = $this->globalEditCountsFromCentralAuth($user);
60
61
        // Pre-populate all projects' metadata, to prevent each project call from fetching it.
62
        $this->caProject->getRepository()->getAll();
0 ignored issues
show
Bug introduced by
The method getAll() does not exist on App\Repository\Repository. It seems like you code against a sub-type of App\Repository\Repository such as App\Repository\ProjectRepository. ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-call  annotation

62
        $this->caProject->getRepository()->/** @scrutinizer ignore-call */ getAll();
Loading history...
63
64
        // Compile the output.
65
        $out = [];
66
        foreach ($editCounts as $editCount) {
67
            $project = new Project($editCount['dbName']);
68
            $project->setRepository($this->projectRepo);
69
            // Make sure the project exists (new projects may not yet be on db replicas).
70
            if ($project->exists()) {
71
                $out[] = [
72
                    'dbName' => $editCount['dbName'],
73
                    'total' => $editCount['total'],
74
                    'project' => $project,
75
                ];
76
            }
77
        }
78
        return $out;
79
    }
80
81
    /**
82
     * Get a user's total edit count on one or more project.
83
     * Requires the CentralAuth extension to be installed on the project.
84
     * @param User $user The user.
85
     * @return array|null Elements are arrays with 'dbName' (string), and 'total' (int). Null for logged out users.
86
     */
87
    protected function globalEditCountsFromCentralAuth(User $user): ?array
88
    {
89
        if (true === $user->isAnon()) {
90
            return null;
91
        }
92
93
        // Set up cache.
94
        $cacheKey = $this->getCacheKey(func_get_args(), 'gc_globaleditcounts');
95
        if ($this->cache->hasItem($cacheKey)) {
96
            return $this->cache->getItem($cacheKey)->get();
97
        }
98
99
        $params = [
100
            'meta' => 'globaluserinfo',
101
            'guiprop' => 'editcount|merged',
102
            'guiuser' => $user->getUsername(),
103
        ];
104
        $result = $this->executeApiRequest($this->caProject, $params);
105
        if (!isset($result['query']['globaluserinfo']['merged'])) {
106
            return [];
107
        }
108
        $out = [];
109
        foreach ($result['query']['globaluserinfo']['merged'] as $result) {
110
            $out[] = [
111
                'dbName' => $result['wiki'],
112
                'total' => $result['editcount'],
113
            ];
114
        }
115
116
        // Cache and return.
117
        return $this->setCache($cacheKey, $out);
118
    }
119
120
    /**
121
     * Loop through the given dbNames and create Project objects for each.
122
     * @param array $dbNames
123
     * @return Project[] Keyed by database name.
124
     */
125
    private function formatProjects(array $dbNames): array
126
    {
127
        $projects = [];
128
129
        foreach ($dbNames as $dbName) {
130
            $projects[$dbName] = $this->projectRepo->getProject($dbName);
131
        }
132
133
        return $projects;
134
    }
135
136
    /**
137
     * Get all Projects on which the user has made at least one edit.
138
     * @param User $user
139
     * @return Project[]
140
     */
141
    public function getProjectsWithEdits(User $user): array
142
    {
143
        if ($user->isAnon()) {
144
            $dbNames = array_keys($this->getDbNamesAndActorIds($user));
145
        } else {
146
            $dbNames = [];
147
148
            foreach ($this->globalEditCountsFromCentralAuth($user) as $projectMeta) {
149
                if ($projectMeta['total'] > 0) {
150
                    $dbNames[] = $projectMeta['dbName'];
151
                }
152
            }
153
        }
154
155
        return $this->formatProjects($dbNames);
156
    }
157
158
    /**
159
     * Get projects that the user has made at least one edit on, and the associated actor ID.
160
     * @param User $user
161
     * @param string[] $dbNames Loop over these projects instead of all of them.
162
     * @return array Keys are database names, values are actor IDs.
163
     */
164
    public function getDbNamesAndActorIds(User $user, ?array $dbNames = null): array
165
    {
166
        // Check cache.
167
        $cacheKey = $this->getCacheKey(func_get_args(), 'gc_db_names_actor_ids');
168
        if ($this->cache->hasItem($cacheKey)) {
169
            return $this->cache->getItem($cacheKey)->get();
170
        }
171
172
        if (!$dbNames) {
173
            $dbNames = array_column($this->caProject->getRepository()->getAll(), 'dbName');
174
        }
175
176
        if ($user->isIpRange()) {
177
            $username = $user->getIpSubstringFromCidr().'%';
178
            $whereClause = "actor_name LIKE :actor";
179
        } else {
180
            $username = $user->getUsername();
181
            $whereClause = "actor_name = :actor";
182
        }
183
184
        $queriesBySlice = [];
185
186
        foreach ($dbNames as $dbName) {
187
            $slice = $this->getDbList()[$dbName];
188
            // actor_revision table only includes users who have made at least one edit.
189
            $actorTable = $this->getTableName($dbName, 'actor', 'revision');
190
            $queriesBySlice[$slice][] = "SELECT '$dbName' AS `dbName`, actor_id " .
191
                "FROM $actorTable WHERE $whereClause";
192
        }
193
194
        $actorIds = [];
195
196
        foreach ($queriesBySlice as $slice => $queries) {
197
            $sql = implode(' UNION ', $queries);
198
            $resultQuery = $this->executeProjectsQuery($slice, $sql, [
199
                'actor' => $username,
200
            ]);
201
202
            while ($row = $resultQuery->fetchAssociative()) {
203
                $actorIds[$row['dbName']] = (int)$row['actor_id'];
204
            }
205
        }
206
207
        return $this->setCache($cacheKey, $actorIds);
208
    }
209
210
    /**
211
     * Get revisions by this user across the given Projects.
212
     * @param string[] $dbNames Database names of projects to iterate over.
213
     * @param User $user The user.
214
     * @param int|string $namespace Namespace ID or 'all' for all namespaces.
215
     * @param int|false $start Unix timestamp or false.
216
     * @param int|false $end Unix timestamp or false.
217
     * @param int $limit The maximum number of revisions to fetch from each project.
218
     * @param int|false $offset Unix timestamp. Used for pagination.
219
     * @return array
220
     */
221
    public function getRevisions(
222
        array $dbNames,
223
        User $user,
224
        $namespace = 'all',
225
        $start = false,
226
        $end = false,
227
        int $limit = 31, // One extra to know whether there should be another page.
228
        $offset = false
229
    ): array {
230
        // Check cache.
231
        $cacheKey = $this->getCacheKey(func_get_args(), 'gc_revisions');
232
        if ($this->cache->hasItem($cacheKey)) {
233
            return $this->cache->getItem($cacheKey)->get();
234
        }
235
236
        // Just need any Connection to use the ->quote() method.
237
        $quoteConn = $this->getProjectsConnection('s1');
238
        $username = $quoteConn->quote($user->getUsername(), PDO::PARAM_STR);
239
240
        // IP range handling.
241
        $startIp = '';
242
        $endIp = '';
243
        if ($user->isIpRange()) {
244
            [$startIp, $endIp] = IPUtils::parseRange($user->getUsername());
245
            $startIp = $quoteConn->quote($startIp, PDO::PARAM_STR);
246
            $endIp = $quoteConn->quote($endIp, PDO::PARAM_STR);
247
        }
248
249
        // Fetch actor IDs (for IP ranges, it strips trailing zeros and uses a LIKE query).
250
        $actorIds = $this->getDbNamesAndActorIds($user, $dbNames);
251
252
        if (!$actorIds) {
253
            return [];
254
        }
255
256
        $namespaceCond = 'all' === $namespace
257
            ? ''
258
            : 'AND page_namespace = '.(int)$namespace;
259
        $revDateConditions = $this->getDateConditions($start, $end, $offset, 'revs.', 'rev_timestamp');
260
261
        // Assemble queries.
262
        $queriesBySlice = [];
263
        $projectRepo = $this->caProject->getRepository();
264
        foreach ($dbNames as $dbName) {
265
            if (isset($actorIds[$dbName])) {
266
                $revisionTable = $projectRepo->getTableName($dbName, 'revision');
267
                $pageTable = $projectRepo->getTableName($dbName, 'page');
268
                $commentTable = $projectRepo->getTableName($dbName, 'comment', 'revision');
269
                $actorTable = $projectRepo->getTableName($dbName, 'actor', 'revision');
270
                $tagTable = $projectRepo->getTableName($dbName, 'change_tag');
271
                $tagDefTable = $projectRepo->getTableName($dbName, 'change_tag_def');
272
273
                if ($user->isIpRange()) {
274
                    $ipcTable = $projectRepo->getTableName($dbName, 'ip_changes');
275
                    $ipcJoin = "JOIN $ipcTable ON revs.rev_id = ipc_rev_id";
276
                    $whereClause = "ipc_hex BETWEEN $startIp AND $endIp";
277
                    $username = 'actor_name';
278
                } else {
279
                    $ipcJoin = '';
280
                    $whereClause = 'revs.rev_actor = '.$actorIds[$dbName];
281
                }
282
283
                $slice = $this->getDbList()[$dbName];
284
                $queriesBySlice[$slice][] = "
285
                    SELECT
286
                        '$dbName' AS dbName,
287
                        revs.rev_id AS id,
288
                        revs.rev_timestamp AS `timestamp`,
289
                        UNIX_TIMESTAMP(revs.rev_timestamp) AS `unix_timestamp`,
290
                        revs.rev_minor_edit AS `minor`,
291
                        revs.rev_deleted AS `deleted`,
292
                        revs.rev_len AS `length`,
293
                        (CAST(revs.rev_len AS SIGNED) - IFNULL(parentrevs.rev_len, 0)) AS `length_change`,
294
                        revs.rev_parent_id AS `parent_id`,
295
                        $username AS `username`,
296
                        page.page_title,
297
                        page.page_namespace AS `namespace`,
298
                        comment_text AS `comment`,
299
                        (
300
                            SELECT 1
301
                            FROM $tagTable
302
                            WHERE ct_rev_id = revs.rev_id
303
                            AND ct_tag_id = (
304
                                SELECT ctd_id
305
                                FROM $tagDefTable
306
                                WHERE ctd_name = 'mw-reverted'
307
                            )
308
                            LIMIT 1
309
                        ) AS reverted
310
                    FROM $revisionTable AS revs
311
                        $ipcJoin
312
                        JOIN $pageTable AS page ON (rev_page = page_id)
313
                        JOIN $actorTable ON (actor_id = revs.rev_actor)
314
                        LEFT JOIN $revisionTable AS parentrevs ON (revs.rev_parent_id = parentrevs.rev_id)
315
                        LEFT OUTER JOIN $commentTable ON revs.rev_comment_id = comment_id
316
                    WHERE $whereClause
317
                        $namespaceCond
318
                        $revDateConditions";
319
            }
320
        }
321
322
        // Re-assemble into UNIONed queries, executing as many per slice as possible.
323
        $revisions = [];
324
        foreach ($queriesBySlice as $slice => $queries) {
325
            $sql = "SELECT * FROM ((\n" . join("\n) UNION (\n", $queries) . ")) a ORDER BY timestamp DESC LIMIT $limit";
326
            $revisions = array_merge($revisions, $this->executeProjectsQuery($slice, $sql)->fetchAllAssociative());
327
        }
328
329
        // If there are more than $limit results, re-sort by timestamp.
330
        if (count($revisions) > $limit) {
331
            usort($revisions, function ($a, $b) {
332
                if ($a['unix_timestamp'] === $b['unix_timestamp']) {
333
                    return 0;
334
                }
335
                return $a['unix_timestamp'] > $b['unix_timestamp'] ? -1 : 1;
336
            });
337
338
            // Truncate size to $limit.
339
            $revisions = array_slice($revisions, 0, $limit);
340
        }
341
342
        // Cache and return.
343
        return $this->setCache($cacheKey, $revisions);
344
    }
345
}
346