GlobalContribsRepository::getRevisions()   C
last analyzed

Complexity

Conditions 12
Paths 67

Size

Total Lines 123
Code Lines 62

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
cc 12
eloc 62
nc 67
nop 7
dl 0
loc 123
rs 6.4024
c 0
b 0
f 0

How to fix   Long Method    Complexity   

Long Method

Small methods make your code easier to understand, in particular if combined with a good name. Besides, if your method is small, finding a good name is usually much easier.

For example, if you find yourself adding comments to a method's body, this is usually a good sign to extract the commented part to a new method, and use the comment as a starting point when coming up with a good name for this new method.

Commonly applied refactorings include:

1
<?php
2
3
declare(strict_types = 1);
4
5
namespace App\Repository;
6
7
use App\Model\Project;
8
use App\Model\User;
9
use Doctrine\Persistence\ManagerRegistry;
10
use GuzzleHttp\Client;
11
use PDO;
12
use Psr\Cache\CacheItemPoolInterface;
13
use Psr\Log\LoggerInterface;
14
use Symfony\Component\DependencyInjection\ParameterBag\ParameterBagInterface;
15
use Wikimedia\IPUtils;
16
17
/**
18
 * A GlobalContribsRepository is responsible for retrieving information from the database for the GlobalContribs tool.
19
 * @codeCoverageIgnore
20
 */
21
class GlobalContribsRepository extends Repository
22
{
23
    protected ProjectRepository $projectRepo;
24
25
    /** @var Project CentralAuth project (meta.wikimedia for WMF installation). */
26
    protected Project $caProject;
27
28
    public function __construct(
29
        ManagerRegistry $managerRegistry,
30
        CacheItemPoolInterface $cache,
31
        Client $guzzle,
32
        LoggerInterface $logger,
33
        ParameterBagInterface $parameterBag,
34
        bool $isWMF,
35
        int $queryTimeout,
36
        ProjectRepository $projectRepo,
37
        string $centralAuthProject
38
    ) {
39
        $this->caProject = new Project($centralAuthProject);
40
        $this->projectRepo = $projectRepo;
41
        $this->caProject->setRepository($this->projectRepo);
42
        parent::__construct($managerRegistry, $cache, $guzzle, $logger, $parameterBag, $isWMF, $queryTimeout);
43
    }
44
45
    /**
46
     * Get a user's edit count for each project.
47
     * @see GlobalContribsRepository::globalEditCountsFromCentralAuth()
48
     * @see GlobalContribsRepository::globalEditCountsFromDatabases()
49
     * @param User $user The user.
50
     * @return mixed[] Elements are arrays with 'project' (Project), and 'total' (int). Null if anon (too slow).
51
     */
52
    public function globalEditCounts(User $user): ?array
53
    {
54
        if ($user->isAnon()) {
55
            return null;
56
        }
57
58
        // Get the edit counts from CentralAuth or database.
59
        $editCounts = $this->globalEditCountsFromCentralAuth($user);
60
61
        // Pre-populate all projects' metadata, to prevent each project call from fetching it.
62
        $this->caProject->getRepository()->getAll();
0 ignored issues
show
Bug introduced by
The method getAll() does not exist on App\Repository\Repository. It seems like you code against a sub-type of App\Repository\Repository such as App\Repository\ProjectRepository. ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-call  annotation

62
        $this->caProject->getRepository()->/** @scrutinizer ignore-call */ getAll();
Loading history...
63
64
        // Compile the output.
65
        $out = [];
66
        foreach ($editCounts as $editCount) {
67
            $project = new Project($editCount['dbName']);
68
            $project->setRepository($this->projectRepo);
69
            // Make sure the project exists (new projects may not yet be on db replicas).
70
            if ($project->exists()) {
71
                $out[] = [
72
                    'dbName' => $editCount['dbName'],
73
                    'total' => $editCount['total'],
74
                    'project' => $project,
75
                ];
76
            }
77
        }
78
        return $out;
79
    }
80
81
    /**
82
     * Get a user's total edit count on one or more project.
83
     * Requires the CentralAuth extension to be installed on the project.
84
     * @param User $user The user.
85
     * @return array|null Elements are arrays with 'dbName' (string), and 'total' (int). Null for logged out users.
86
     */
87
    protected function globalEditCountsFromCentralAuth(User $user): ?array
88
    {
89
        if (true === $user->isAnon()) {
90
            return null;
91
        }
92
93
        // Set up cache.
94
        $cacheKey = $this->getCacheKey(func_get_args(), 'gc_globaleditcounts');
95
        if ($this->cache->hasItem($cacheKey)) {
96
            return $this->cache->getItem($cacheKey)->get();
97
        }
98
99
        $params = [
100
            'meta' => 'globaluserinfo',
101
            'guiprop' => 'editcount|merged',
102
            'guiuser' => $user->getUsername(),
103
        ];
104
        $result = $this->executeApiRequest($this->caProject, $params);
105
        if (!isset($result['query']['globaluserinfo']['merged'])) {
106
            return [];
107
        }
108
        $out = [];
109
        foreach ($result['query']['globaluserinfo']['merged'] as $result) {
110
            $out[] = [
111
                'dbName' => $result['wiki'],
112
                'total' => $result['editcount'],
113
            ];
114
        }
115
116
        // Cache and return.
117
        return $this->setCache($cacheKey, $out);
118
    }
119
120
    /**
121
     * Loop through the given dbNames and create Project objects for each.
122
     * @param array $dbNames
123
     * @return Project[] Keyed by database name.
124
     */
125
    private function formatProjects(array $dbNames): array
126
    {
127
        $projects = [];
128
129
        foreach ($dbNames as $dbName) {
130
            $projects[$dbName] = $this->projectRepo->getProject($dbName);
131
        }
132
133
        return $projects;
134
    }
135
136
    /**
137
     * Get all Projects on which the user has made at least one edit.
138
     * @param User $user
139
     * @return Project[]
140
     */
141
    public function getProjectsWithEdits(User $user): array
142
    {
143
        if ($user->isAnon()) {
144
            $dbNames = array_keys($this->getDbNamesAndActorIds($user));
145
        } else {
146
            $dbNames = [];
147
148
            foreach ($this->globalEditCountsFromCentralAuth($user) as $projectMeta) {
149
                if ($projectMeta['total'] > 0) {
150
                    $dbNames[] = $projectMeta['dbName'];
151
                }
152
            }
153
        }
154
155
        return $this->formatProjects($dbNames);
156
    }
157
158
    /**
159
     * Get projects that the user has made at least one edit on, and the associated actor ID.
160
     * @param User $user
161
     * @param string[] $dbNames Loop over these projects instead of all of them.
162
     * @return array Keys are database names, values are actor IDs.
163
     */
164
    public function getDbNamesAndActorIds(User $user, ?array $dbNames = null): array
165
    {
166
        // Check cache.
167
        $cacheKey = $this->getCacheKey(func_get_args(), 'gc_db_names_actor_ids');
168
        if ($this->cache->hasItem($cacheKey)) {
169
            return $this->cache->getItem($cacheKey)->get();
170
        }
171
172
        if (!$dbNames) {
173
            $dbNames = array_column($this->caProject->getRepository()->getAll(), 'dbName');
174
        }
175
176
        if ($user->isIpRange()) {
177
            $username = $user->getIpSubstringFromCidr().'%';
178
            $whereClause = "actor_name LIKE :actor";
179
        } else {
180
            $username = $user->getUsername();
181
            $whereClause = "actor_name = :actor";
182
        }
183
184
        $queriesBySlice = [];
185
186
        foreach ($dbNames as $dbName) {
187
            $slice = $this->getDbList()[$dbName];
188
            // actor_revision table only includes users who have made at least one edit.
189
            $actorTable = $this->getTableName($dbName, 'actor', 'revision');
190
            $queriesBySlice[$slice][] = "SELECT '$dbName' AS `dbName`, actor_id " .
191
                "FROM $actorTable WHERE $whereClause";
192
        }
193
194
        $actorIds = [];
195
196
        foreach ($queriesBySlice as $slice => $queries) {
197
            $sql = implode(' UNION ', $queries);
198
            $resultQuery = $this->executeProjectsQuery($slice, $sql, [
199
                'actor' => $username,
200
            ]);
201
202
            while ($row = $resultQuery->fetchAssociative()) {
203
                $actorIds[$row['dbName']] = (int)$row['actor_id'];
204
            }
205
        }
206
207
        return $this->setCache($cacheKey, $actorIds);
208
    }
209
210
    /**
211
     * Get revisions by this user across the given Projects.
212
     * @param string[] $dbNames Database names of projects to iterate over.
213
     * @param User $user The user.
214
     * @param int|string $namespace Namespace ID or 'all' for all namespaces.
215
     * @param int|false $start Unix timestamp or false.
216
     * @param int|false $end Unix timestamp or false.
217
     * @param int $limit The maximum number of revisions to fetch from each project.
218
     * @param int|false $offset Unix timestamp. Used for pagination.
219
     * @return array
220
     */
221
    public function getRevisions(
222
        array $dbNames,
223
        User $user,
224
        $namespace = 'all',
225
        $start = false,
226
        $end = false,
227
        int $limit = 31, // One extra to know whether there should be another page.
228
        $offset = false
229
    ): array {
230
        // Check cache.
231
        $cacheKey = $this->getCacheKey(func_get_args(), 'gc_revisions');
232
        if ($this->cache->hasItem($cacheKey)) {
233
            return $this->cache->getItem($cacheKey)->get();
234
        }
235
236
        // Just need any Connection to use the ->quote() method.
237
        $quoteConn = $this->getProjectsConnection('s1');
238
        $username = $quoteConn->quote($user->getUsername(), PDO::PARAM_STR);
239
240
        // IP range handling.
241
        $startIp = '';
242
        $endIp = '';
243
        if ($user->isIpRange()) {
244
            [$startIp, $endIp] = IPUtils::parseRange($user->getUsername());
245
            $startIp = $quoteConn->quote($startIp, PDO::PARAM_STR);
246
            $endIp = $quoteConn->quote($endIp, PDO::PARAM_STR);
247
        }
248
249
        // Fetch actor IDs (for IP ranges, it strips trailing zeros and uses a LIKE query).
250
        $actorIds = $this->getDbNamesAndActorIds($user, $dbNames);
251
252
        if (!$actorIds) {
253
            return [];
254
        }
255
256
        $namespaceCond = 'all' === $namespace
257
            ? ''
258
            : 'AND page_namespace = '.(int)$namespace;
259
        $revDateConditions = $this->getDateConditions($start, $end, $offset, 'revs.', 'rev_timestamp');
260
261
        // Assemble queries.
262
        $queriesBySlice = [];
263
        $projectRepo = $this->caProject->getRepository();
264
        foreach ($dbNames as $dbName) {
265
            if (isset($actorIds[$dbName])) {
266
                $revisionTable = $projectRepo->getTableName($dbName, 'revision');
267
                $pageTable = $projectRepo->getTableName($dbName, 'page');
268
                $commentTable = $projectRepo->getTableName($dbName, 'comment', 'revision');
269
                $actorTable = $projectRepo->getTableName($dbName, 'actor', 'revision');
270
                $tagTable = $projectRepo->getTableName($dbName, 'change_tag');
271
                $tagDefTable = $projectRepo->getTableName($dbName, 'change_tag_def');
272
273
                if ($user->isIpRange()) {
274
                    $ipcTable = $projectRepo->getTableName($dbName, 'ip_changes');
275
                    $ipcJoin = "JOIN $ipcTable ON revs.rev_id = ipc_rev_id";
276
                    $whereClause = "ipc_hex BETWEEN $startIp AND $endIp";
277
                    $username = 'actor_name';
278
                } else {
279
                    $ipcJoin = '';
280
                    $whereClause = 'revs.rev_actor = '.$actorIds[$dbName];
281
                }
282
283
                $slice = $this->getDbList()[$dbName];
284
                $queriesBySlice[$slice][] = "
285
                    SELECT
286
                        '$dbName' AS dbName,
287
                        revs.rev_id AS id,
288
                        revs.rev_timestamp AS `timestamp`,
289
                        UNIX_TIMESTAMP(revs.rev_timestamp) AS `unix_timestamp`,
290
                        revs.rev_minor_edit AS `minor`,
291
                        revs.rev_deleted AS `deleted`,
292
                        revs.rev_len AS `length`,
293
                        (CAST(revs.rev_len AS SIGNED) - IFNULL(parentrevs.rev_len, 0)) AS `length_change`,
294
                        revs.rev_parent_id AS `parent_id`,
295
                        $username AS `username`,
296
                        page.page_title,
297
                        page.page_namespace AS `namespace`,
298
                        comment_text AS `comment`,
299
                        (
300
                            SELECT 1
301
                            FROM $tagTable
302
                            WHERE ct_rev_id = revs.rev_id
303
                            AND ct_tag_id = (
304
                                SELECT ctd_id
305
                                FROM $tagDefTable
306
                                WHERE ctd_name = 'mw-reverted'
307
                            )
308
                            LIMIT 1
309
                        ) AS reverted
310
                    FROM $revisionTable AS revs
311
                        $ipcJoin
312
                        JOIN $pageTable AS page ON (rev_page = page_id)
313
                        JOIN $actorTable ON (actor_id = revs.rev_actor)
314
                        LEFT JOIN $revisionTable AS parentrevs ON (revs.rev_parent_id = parentrevs.rev_id)
315
                        LEFT OUTER JOIN $commentTable ON revs.rev_comment_id = comment_id
316
                    WHERE $whereClause
317
                        $namespaceCond
318
                        $revDateConditions";
319
            }
320
        }
321
322
        // Re-assemble into UNIONed queries, executing as many per slice as possible.
323
        $revisions = [];
324
        foreach ($queriesBySlice as $slice => $queries) {
325
            $sql = "SELECT * FROM ((\n" . join("\n) UNION (\n", $queries) . ")) a ORDER BY timestamp DESC LIMIT $limit";
326
            $revisions = array_merge($revisions, $this->executeProjectsQuery($slice, $sql)->fetchAllAssociative());
327
        }
328
329
        // If there are more than $limit results, re-sort by timestamp.
330
        if (count($revisions) > $limit) {
331
            usort($revisions, function ($a, $b) {
332
                if ($a['unix_timestamp'] === $b['unix_timestamp']) {
333
                    return 0;
334
                }
335
                return $a['unix_timestamp'] > $b['unix_timestamp'] ? -1 : 1;
336
            });
337
338
            // Truncate size to $limit.
339
            $revisions = array_slice($revisions, 0, $limit);
340
        }
341
342
        // Cache and return.
343
        return $this->setCache($cacheKey, $revisions);
344
    }
345
}
346