AutoEditsRepository::getAutomatedEdits()   B
last analyzed

Complexity

Conditions 8
Paths 33

Size

Total Lines 81
Code Lines 42

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
cc 8
eloc 42
nc 33
nop 8
dl 0
loc 81
rs 8.0035
c 0
b 0
f 0

How to fix   Long Method    Many Parameters   

Long Method

Small methods make your code easier to understand, in particular if combined with a good name. Besides, if your method is small, finding a good name is usually much easier.

For example, if you find yourself adding comments to a method's body, this is usually a good sign to extract the commented part to a new method, and use the comment as a starting point when coming up with a good name for this new method.

Commonly applied refactorings include:

Many Parameters

Methods with many parameters are not only hard to understand, but their parameters also often become inconsistent when you need more, or different data.

There are several approaches to avoid long parameter lists:

1
<?php
2
3
declare(strict_types = 1);
4
5
namespace App\Repository;
6
7
use App\Helper\AutomatedEditsHelper;
8
use App\Model\Project;
9
use App\Model\User;
10
use Doctrine\Persistence\ManagerRegistry;
11
use GuzzleHttp\Client;
12
use PDO;
13
use Psr\Cache\CacheItemPoolInterface;
14
use Psr\Log\LoggerInterface;
15
use Symfony\Component\DependencyInjection\ParameterBag\ParameterBagInterface;
16
use Symfony\Component\HttpFoundation\RequestStack;
17
use Wikimedia\IPUtils;
18
19
/**
20
 * AutoEditsRepository is responsible for retrieving data from the database
21
 * about the automated edits made by a user.
22
 * @codeCoverageIgnore
23
 */
24
class AutoEditsRepository extends UserRepository
25
{
26
    protected AutomatedEditsHelper $autoEditsHelper;
27
28
    /** @var array List of automated tools, used for fetching the tool list and filtering it. */
29
    private array $aeTools;
30
31
    /** @var bool Whether to use the /sandbox version of the config, bypassing caching. */
32
    private bool $useSandbox = false;
33
34
    /** @var array Process cache for tags/IDs. */
35
    private array $tags;
36
37
    /**
38
     * @param ManagerRegistry $managerRegistry
39
     * @param CacheItemPoolInterface $cache
40
     * @param Client $guzzle
41
     * @param LoggerInterface $logger
42
     * @param ParameterBagInterface $parameterBag
43
     * @param bool $isWMF
44
     * @param int $queryTimeout
45
     * @param ProjectRepository $projectRepo
46
     * @param AutomatedEditsHelper $autoEditsHelper
47
     * @param RequestStack $requestStack
48
     */
49
    public function __construct(
50
        ManagerRegistry $managerRegistry,
51
        CacheItemPoolInterface $cache,
52
        Client $guzzle,
53
        LoggerInterface $logger,
54
        ParameterBagInterface $parameterBag,
55
        bool $isWMF,
56
        int $queryTimeout,
57
        ProjectRepository $projectRepo,
58
        AutomatedEditsHelper $autoEditsHelper,
59
        RequestStack $requestStack
60
    ) {
61
        $this->autoEditsHelper = $autoEditsHelper;
62
        parent::__construct(
63
            $managerRegistry,
64
            $cache,
65
            $guzzle,
66
            $logger,
67
            $parameterBag,
68
            $isWMF,
69
            $queryTimeout,
70
            $projectRepo,
71
            $requestStack
72
        );
73
    }
74
75
    /**
76
     * @param bool $useSandbox
77
     * @return AutoEditsRepository
78
     */
79
    public function setUseSandbox(bool $useSandbox): AutoEditsRepository
80
    {
81
        $this->useSandbox = $useSandbox;
82
        return $this;
83
    }
84
85
    /**
86
     * @return bool
87
     */
88
    public function getUseSandbox(): bool
89
    {
90
        return $this->useSandbox;
91
    }
92
93
    /**
94
     * Method to give the repository access to the AutomatedEditsHelper and fetch the list of semi-automated tools.
95
     * @param Project $project
96
     * @param int|string $namespace Namespace ID or 'all'.
97
     * @return array
98
     */
99
    public function getTools(Project $project, $namespace = 'all'): array
100
    {
101
        if (!isset($this->aeTools)) {
102
            $this->aeTools = $this->autoEditsHelper->getTools($project, $this->useSandbox);
103
        }
104
105
        if ('all' !== $namespace) {
106
            // Limit by namespace.
107
            return array_filter($this->aeTools, function (array $tool) use ($namespace) {
108
                return empty($tool['namespaces']) ||
109
                    in_array((int)$namespace, $tool['namespaces']) ||
110
                    (
111
                        1 === $namespace % 2 &&
112
                        isset($tool['talk_namespaces'])
113
                    );
114
            });
115
        }
116
117
        return $this->aeTools;
118
    }
119
120
    /**
121
     * Get tools that were misconfigured, also removing them from $this->aeTools.
122
     * @param Project $project
123
     * @return string[] Labels for the invalid tools.
124
     */
125
    public function getInvalidTools(Project $project): array
126
    {
127
        $tools = $this->getTools($project);
128
        $invalidTools = $tools['invalid'] ?? [];
129
        unset($this->aeTools['invalid']);
130
        return $invalidTools;
131
    }
132
133
    /**
134
     * Overrides Repository::setCache(), and will not call the parent (which sets the cache) if using the sandbox.
135
     * @inheritDoc
136
     */
137
    public function setCache(string $cacheKey, $value, $duration = 'PT20M')
138
    {
139
        if ($this->useSandbox) {
140
            return $value;
141
        }
142
143
        return parent::setCache($cacheKey, $value, $duration);
144
    }
145
146
    /**
147
     * Get the number of edits this user made using semi-automated tools.
148
     * @param Project $project
149
     * @param User $user
150
     * @param string|int $namespace Namespace ID or 'all'
151
     * @param int|false $start Start date as Unix timestamp.
152
     * @param int|false $end End date as Unix timestamp.
153
     * @return int Result of query, see below.
154
     */
155
    public function countAutomatedEdits(
156
        Project $project,
157
        User $user,
158
        $namespace = 'all',
159
        $start = false,
160
        $end = false
161
    ): int {
162
        $cacheKey = $this->getCacheKey(func_get_args(), 'user_autoeditcount');
163
        if (!$this->useSandbox && $this->cache->hasItem($cacheKey)) {
164
            return $this->cache->getItem($cacheKey)->get();
165
        }
166
167
        $revDateConditions = $this->getDateConditions($start, $end);
168
169
        // Get the combined regex and tags for the tools
170
        [$regex, $tagIds] = $this->getToolRegexAndTags($project, false, null, $namespace);
171
172
        [$pageJoin, $condNamespace] = $this->getPageAndNamespaceSql($project, $namespace);
173
174
        $revisionTable = $project->getTableName('revision');
175
        $ipcTable = $project->getTableName('ip_changes');
176
        $commentTable = $project->getTableName('comment', 'revision');
177
        $tagTable = $project->getTableName('change_tag');
178
        $commentJoin = '';
179
        $tagJoin = '';
180
181
        $params = [];
182
183
        // IP range handling.
184
        $ipcJoin = '';
185
        $whereClause = 'rev_actor = :actorId';
186
        if ($user->isIpRange()) {
187
            $ipcJoin = "JOIN $ipcTable ON rev_id = ipc_rev_id";
188
            $whereClause = 'ipc_hex BETWEEN :startIp AND :endIp';
189
            [$params['startIp'], $params['endIp']] = IPUtils::parseRange($user->getUsername());
190
        }
191
192
        // Build SQL for detecting AutoEdits via regex and/or tags.
193
        $condTools = [];
194
        if ('' != $regex) {
195
            $commentJoin = "LEFT OUTER JOIN $commentTable ON rev_comment_id = comment_id";
196
            $condTools[] = "comment_text REGEXP :tools";
197
            $params['tools'] = $regex;
198
        }
199
        if ('' != $tagIds) {
200
            $tagJoin = "LEFT OUTER JOIN $tagTable ON ct_rev_id = rev_id";
201
            $condTools[] = "ct_tag_id IN ($tagIds)";
202
        }
203
        $condTool = 'AND (' . implode(' OR ', $condTools) . ')';
204
205
        $sql = "SELECT COUNT(DISTINCT(rev_id))
206
                FROM $revisionTable
207
                $ipcJoin
208
                $pageJoin
209
                $commentJoin
210
                $tagJoin
211
                WHERE $whereClause
212
                $condNamespace
213
                $condTool
214
                $revDateConditions";
215
216
        $resultQuery = $this->executeQuery($sql, $project, $user, $namespace, $params);
217
        $result = (int)$resultQuery->fetchOne();
218
219
        // Cache and return.
220
        return $this->setCache($cacheKey, $result);
221
    }
222
223
    /**
224
     * Get non-automated contributions for the given user.
225
     * @param Project $project
226
     * @param User $user
227
     * @param string|int $namespace Namespace ID or 'all'.
228
     * @param int|false $start Start date as Unix timestamp.
229
     * @param int|false $end End date as Unix timestamp.
230
     * @param int|false $offset Unix timestamp. Used for pagination.
231
     * @param int $limit Number of results to return.
232
     * @return string[] Result of query, with columns 'page_title', 'namespace', 'rev_id', 'timestamp', 'minor',
233
     *   'length', 'length_change', 'comment'.
234
     */
235
    public function getNonAutomatedEdits(
236
        Project $project,
237
        User $user,
238
        $namespace = 'all',
239
        $start = false,
240
        $end = false,
241
        $offset = false,
242
        int $limit = 50
243
    ): array {
244
        $cacheKey = $this->getCacheKey(func_get_args(), 'user_nonautoedits');
245
        if (!$this->useSandbox && $this->cache->hasItem($cacheKey)) {
246
            return $this->cache->getItem($cacheKey)->get();
247
        }
248
249
        $revDateConditions = $this->getDateConditions($start, $end, $offset, 'revs.');
250
251
        // Get the combined regex and tags for the tools
252
        [$regex, $tagIds] = $this->getToolRegexAndTags($project, false, null, $namespace);
253
254
        $pageTable = $project->getTableName('page');
255
        $revisionTable = $project->getTableName('revision');
256
        $ipcTable = $project->getTableName('ip_changes');
257
        $commentTable = $project->getTableName('comment', 'revision');
258
        $tagTable = $project->getTableName('change_tag');
259
260
        // IP range handling.
261
        $ipcJoin = '';
262
        $whereClause = 'revs.rev_actor = :actorId';
263
        $params = ['tools' => $regex];
264
        if ($user->isIpRange()) {
265
            $ipcJoin = "JOIN $ipcTable ON revs.rev_id = ipc_rev_id";
266
            $whereClause = 'ipc_hex BETWEEN :startIp AND :endIp';
267
            [$params['startIp'], $params['endIp']] = IPUtils::parseRange($user->getUsername());
268
        }
269
270
        $condNamespace = 'all' === $namespace ? '' : 'AND page_namespace = :namespace';
271
        $condTag = '' != $tagIds ? "AND NOT EXISTS (SELECT 1 FROM $tagTable
272
            WHERE ct_rev_id = revs.rev_id AND ct_tag_id IN ($tagIds))" : '';
273
274
        $sql = "SELECT
275
                    page_title,
276
                    page_namespace AS `namespace`,
277
                    revs.rev_id AS rev_id,
278
                    revs.rev_timestamp AS `timestamp`,
279
                    revs.rev_minor_edit AS `minor`,
280
                    revs.rev_len AS `length`,
281
                    (CAST(revs.rev_len AS SIGNED) - IFNULL(parentrevs.rev_len, 0)) AS `length_change`,
282
                    comment_text AS `comment`
283
                FROM $pageTable
284
                JOIN $revisionTable AS revs ON (page_id = revs.rev_page)
285
                $ipcJoin
286
                LEFT JOIN $revisionTable AS parentrevs ON (revs.rev_parent_id = parentrevs.rev_id)
287
                LEFT OUTER JOIN $commentTable ON (revs.rev_comment_id = comment_id)
288
                WHERE $whereClause
289
                AND revs.rev_timestamp > 0
290
                AND comment_text NOT RLIKE :tools
291
                $condTag
292
                $revDateConditions
293
                $condNamespace
294
                GROUP BY revs.rev_id
295
                ORDER BY revs.rev_timestamp DESC
296
                LIMIT $limit";
297
298
        $resultQuery = $this->executeQuery($sql, $project, $user, $namespace, $params);
299
        $result = $resultQuery->fetchAllAssociative();
300
301
        // Cache and return.
302
        return $this->setCache($cacheKey, $result);
303
    }
304
305
    /**
306
     * Get (semi-)automated contributions for the given user, and optionally for a given tool.
307
     * @param Project $project
308
     * @param User $user
309
     * @param string|int $namespace Namespace ID or 'all'.
310
     * @param int|false $start Start date as Unix timestamp.
311
     * @param int|false $end End date as Unix timestamp.
312
     * @param string|null $tool Only get edits made with this tool. Must match the keys in the AutoEdits config.
313
     * @param int|false $offset Unix timestamp. Used for pagination.
314
     * @param int $limit Number of results to return.
315
     * @return string[] Result of query, with columns 'page_title', 'namespace', 'rev_id', 'timestamp', 'minor',
316
     *   'length', 'length_change', 'comment'.
317
     */
318
    public function getAutomatedEdits(
319
        Project $project,
320
        User $user,
321
        $namespace = 'all',
322
        $start = false,
323
        $end = false,
324
        ?string $tool = null,
325
        $offset = false,
326
        int $limit = 50
327
    ): array {
328
        $cacheKey = $this->getCacheKey(func_get_args(), 'user_autoedits');
329
        if (!$this->useSandbox && $this->cache->hasItem($cacheKey)) {
330
            return $this->cache->getItem($cacheKey)->get();
331
        }
332
333
        $revDateConditions = $this->getDateConditions($start, $end, $offset, 'revs.');
334
335
        // In this case there is a slight performance improvement we can make if we're not given a start date.
336
        if ('' === $revDateConditions) {
337
            $revDateConditions = 'AND revs.rev_timestamp > 0';
338
        }
339
340
        // Get the combined regex and tags for the tools
341
        [$regex, $tagIds] = $this->getToolRegexAndTags($project, false, $tool);
342
343
        $pageTable = $project->getTableName('page');
344
        $revisionTable = $project->getTableName('revision');
345
        $ipcTable = $project->getTableName('ip_changes');
346
        $commentTable = $project->getTableName('comment', 'revision');
347
        $tagTable = $project->getTableName('change_tag');
348
        $condNamespace = 'all' === $namespace ? '' : 'AND page_namespace = :namespace';
349
        $tagJoin = '';
350
        $condsTool = [];
351
352
        if ('' != $regex) {
353
            $condsTool[] = 'comment_text RLIKE :tools';
354
        }
355
356
        if ('' != $tagIds) {
357
            $tagJoin = "LEFT OUTER JOIN $tagTable ON (ct_rev_id = revs.rev_id)";
358
            $condsTool[] = "ct_tag_id IN ($tagIds)";
359
        }
360
361
        // IP range handling.
362
        $ipcJoin = '';
363
        $whereClause = 'revs.rev_actor = :actorId';
364
        $params = ['tools' => $regex];
365
        if ($user->isIpRange()) {
366
            $ipcJoin = "JOIN $ipcTable ON rev_id = ipc_rev_id";
367
            $whereClause = 'ipc_hex BETWEEN :startIp AND :endIp';
368
            [$params['startIp'], $params['endIp']] = IPUtils::parseRange($user->getUsername());
369
        }
370
371
        $sql = "SELECT
372
                    page_title,
373
                    page_namespace AS `namespace`,
374
                    revs.rev_id AS `rev_id`,
375
                    revs.rev_timestamp AS `timestamp`,
376
                    revs.rev_minor_edit AS `minor`,
377
                    revs.rev_len AS `length`,
378
                    (CAST(revs.rev_len AS SIGNED) - IFNULL(parentrevs.rev_len, 0)) AS `length_change`,
379
                    comment_text AS `comment`
380
                FROM $pageTable
381
                JOIN $revisionTable AS revs ON (page_id = revs.rev_page)
382
                $ipcJoin
383
                LEFT JOIN $revisionTable AS parentrevs ON (revs.rev_parent_id = parentrevs.rev_id)
384
                LEFT OUTER JOIN $commentTable ON (revs.rev_comment_id = comment_id)
385
                $tagJoin
386
                WHERE $whereClause
387
                $revDateConditions
388
                $condNamespace
389
                AND (".implode(' OR ', $condsTool).")
390
                GROUP BY revs.rev_id
391
                ORDER BY revs.rev_timestamp DESC
392
                LIMIT $limit";
393
394
        $resultQuery = $this->executeQuery($sql, $project, $user, $namespace, $params);
395
        $result = $resultQuery->fetchAllAssociative();
396
397
        // Cache and return.
398
        return $this->setCache($cacheKey, $result);
399
    }
400
401
    /**
402
     * Get counts of known automated tools used by the given user.
403
     * @param Project $project
404
     * @param User $user
405
     * @param string|int $namespace Namespace ID or 'all'.
406
     * @param int|false $start Start date as Unix timestamp.
407
     * @param int|false $end End date as Unix timestamp.
408
     * @return string[] Each tool that they used along with the count and link:
409
     *                  [
410
     *                      'Twinkle' => [
411
     *                          'count' => 50,
412
     *                          'link' => 'Wikipedia:Twinkle',
413
     *                      ],
414
     *                  ]
415
     */
416
    public function getToolCounts(Project $project, User $user, $namespace = 'all', $start = false, $end = false): array
417
    {
418
        $cacheKey = $this->getCacheKey(func_get_args(), 'user_autotoolcounts');
419
        if (!$this->useSandbox && $this->cache->hasItem($cacheKey)) {
420
            return $this->cache->getItem($cacheKey)->get();
421
        }
422
423
        $sql = $this->getAutomatedCountsSql($project, $user, $namespace, $start, $end);
424
        $params = [];
425
        if ($user->isIpRange()) {
426
            [$params['startIp'], $params['endIp']] = IPUtils::parseRange($user->getUsername());
427
        }
428
        $resultQuery = $this->executeQuery($sql, $project, $user, $namespace, $params);
429
430
        $tools = $this->getTools($project, $namespace);
431
432
        // handling results
433
        $results = [];
434
435
        while ($row = $resultQuery->fetchAssociative()) {
436
            // Only track tools that they've used at least once
437
            $tool = $row['toolname'];
438
            if ($row['count'] > 0) {
439
                $results[$tool] = [
440
                    'link' => $tools[$tool]['link'],
441
                    'label' => $tools[$tool]['label'] ?? $tool,
442
                    'count' => $row['count'],
443
                ];
444
            }
445
        }
446
447
        // Sort the array by count
448
        uasort($results, function ($a, $b) {
449
            return $b['count'] - $a['count'];
450
        });
451
452
        // Cache and return.
453
        return $this->setCache($cacheKey, $results);
454
    }
455
456
    /**
457
     * Get SQL for getting counts of known automated tools used by the user.
458
     * @see self::getAutomatedCounts()
459
     * @param Project $project
460
     * @param User $user
461
     * @param string|int $namespace Namespace ID or 'all'.
462
     * @param int|false $start Start date as Unix timestamp.
463
     * @param int|false $end End date as Unix timestamp.
464
     * @return string The SQL.
465
     */
466
    private function getAutomatedCountsSql(
467
        Project $project,
468
        User $user,
469
        $namespace,
470
        $start = false,
471
        $end = false
472
    ): string {
473
        $revDateConditions = $this->getDateConditions($start, $end);
474
475
        // Load the semi-automated edit types.
476
        $tools = $this->getTools($project, $namespace);
477
478
        // Create a collection of queries that we're going to run.
479
        $queries = [];
480
481
        $revisionTable = $project->getTableName('revision');
482
        $ipcTable = $project->getTableName('ip_changes');
483
        [$pageJoin, $condNamespace] = $this->getPageAndNamespaceSql($project, $namespace);
484
        $conn = $this->getProjectsConnection($project);
485
486
        // IP range handling.
487
        $ipcJoin = '';
488
        $whereClause = 'rev_actor = :actorId';
489
        if ($user->isIpRange()) {
490
            $ipcJoin = "JOIN $ipcTable ON rev_id = ipc_rev_id";
491
            $whereClause = 'ipc_hex BETWEEN :startIp AND :endIp';
492
        }
493
494
        foreach ($tools as $toolName => $values) {
495
            [$condTool, $commentJoin, $tagJoin] = $this->getInnerAutomatedCountsSql($project, $toolName, $values);
496
497
            $toolName = $conn->quote($toolName, PDO::PARAM_STR);
498
499
            // No regex or tag provided for this tool. This can happen for tag-only tools that are in the global
500
            // configuration, but no local tag exists on the said project.
501
            if ('' === $condTool) {
502
                continue;
503
            }
504
505
            $queries[] .= "
506
                SELECT $toolName AS toolname, COUNT(DISTINCT(rev_id)) AS count
507
                FROM $revisionTable
508
                $ipcJoin
509
                $pageJoin
510
                $commentJoin
511
                $tagJoin
512
                WHERE $whereClause
513
                AND $condTool
514
                $condNamespace
515
                $revDateConditions";
516
        }
517
518
        // Combine to one big query.
519
        return implode(' UNION ', $queries);
520
    }
521
522
    /**
523
     * Get some of the inner SQL for self::getAutomatedCountsSql().
524
     * @param Project $project
525
     * @param string $toolName
526
     * @param string[] $values Values as defined in the AutoEdits config.
527
     * @return string[] [Equality clause, JOIN clause]
528
     */
529
    protected function getInnerAutomatedCountsSql(Project $project, string $toolName, array $values): array
530
    {
531
        $conn = $this->getProjectsConnection($project);
532
        $commentJoin = '';
533
        $tagJoin = '';
534
        $condTool = '';
535
536
        if (isset($values['regex'])) {
537
            $commentTable = $project->getTableName('comment', 'revision');
538
            $commentJoin = "LEFT OUTER JOIN $commentTable ON rev_comment_id = comment_id";
539
            $regex = $conn->quote($values['regex'], PDO::PARAM_STR);
540
            $condTool = "comment_text REGEXP $regex";
541
        }
542
        if (isset($values['tags'])) {
543
            $tagIds = $this->getTagIdsFromNames($project, $values['tags']);
0 ignored issues
show
Bug introduced by
$values['tags'] of type string is incompatible with the type array expected by parameter $tagNames of App\Repository\AutoEdits...y::getTagIdsFromNames(). ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

543
            $tagIds = $this->getTagIdsFromNames($project, /** @scrutinizer ignore-type */ $values['tags']);
Loading history...
544
545
            if ($tagIds) {
0 ignored issues
show
Bug Best Practice introduced by
The expression $tagIds of type array is implicitly converted to a boolean; are you sure this is intended? If so, consider using ! empty($expr) instead to make it clear that you intend to check for an array without elements.

This check marks implicit conversions of arrays to boolean values in a comparison. While in PHP an empty array is considered to be equal (but not identical) to false, this is not always apparent.

Consider making the comparison explicit by using empty(..) or ! empty(...) instead.

Loading history...
546
                $tagTable = $project->getTableName('change_tag');
547
                $tagJoin = "LEFT OUTER JOIN $tagTable ON ct_rev_id = rev_id";
548
                $tagClause = $this->getTagsExclusionsSql($project, $toolName, $tagIds);
549
550
                // Use tags in addition to the regex clause, if already present.
551
                // Tags are more reliable but may not be present for edits made with
552
                // older versions of the tool, before it started adding tags.
553
                if ('' === $condTool) {
554
                    $condTool = $tagClause;
555
                } else {
556
                    $condTool = "($condTool OR $tagClause)";
557
                }
558
            }
559
        }
560
561
        return [$condTool, $commentJoin, $tagJoin];
562
    }
563
564
    /**
565
     * Get the combined regex and tags for all semi-automated tools, or the given tool, ready to be used in a query.
566
     * @param Project $project
567
     * @param bool $nonAutoEdits Set to true to exclude tools with the 'contribs' flag.
568
     * @param string|null $tool
569
     * @param int|string|null $namespace Tools only used in given namespace ID, or 'all' for all namespaces.
570
     * @return array In the format: ['combined|regex', '1,2,3'] where the second element is a
571
     *   comma-separated list of the tag IDs, ready to be used in SQL.
572
     */
573
    private function getToolRegexAndTags(
574
        Project $project,
575
        bool $nonAutoEdits = false,
576
        ?string $tool = null,
577
        $namespace = null
578
    ): array {
579
        $tools = $this->getTools($project);
580
        $regexes = [];
581
        $tagIds = [];
582
583
        if ('' != $tool) {
584
            $tools = [$tools[$tool]];
585
        }
586
587
        foreach (array_values($tools) as $values) {
588
            if ($nonAutoEdits && isset($values['contribs'])) {
589
                continue;
590
            }
591
592
            if (is_numeric($namespace) &&
593
                !empty($values['namespaces']) &&
594
                !in_array((int)$namespace, $values['namespaces'])
595
            ) {
596
                continue;
597
            }
598
599
            if (isset($values['regex'])) {
600
                $regexes[] = $values['regex'];
601
            }
602
            if (isset($values['tags'])) {
603
                $tagIds = array_merge($tagIds, $this->getTagIdsFromNames($project, $values['tags']));
604
            }
605
        }
606
607
        return [
608
            implode('|', $regexes),
609
            implode(',', $tagIds),
610
        ];
611
    }
612
613
    /**
614
     * Get the IDs of tags for given Project, which are used in the IN clauses of other queries above.
615
     * This join decomposition is actually faster than JOIN'ing on change_tag_def all in one query.
616
     * @param Project $project
617
     * @return int[] Keys are the tag name, values are the IDs.
618
     */
619
    public function getTags(Project $project): array
620
    {
621
        // Use process cache; ensures we don't needlessly re-query for tag IDs
622
        // during the same request when using the ?usesandbox=1 option.
623
        if (isset($this->tags)) {
624
            return $this->tags;
625
        }
626
627
        $cacheKey = $this->getCacheKey(func_get_args(), 'ae_tag_ids');
628
        if (!$this->useSandbox && $this->cache->hasItem($cacheKey)) {
629
            return $this->cache->getItem($cacheKey)->get();
630
        }
631
632
        $conn = $this->getProjectsConnection($project);
633
634
        // Get all tag values.
635
        $tags = [];
636
        foreach (array_values($this->getTools($project)) as $values) {
637
            if (isset($values['tags'])) {
638
                $tags = array_merge(
639
                    $tags,
640
                    array_map(function ($tag) use ($conn) {
641
                        return $conn->quote($tag, PDO::PARAM_STR);
642
                    }, $values['tags'])
643
                );
644
            }
645
        }
646
647
        $tags = implode(',', $tags);
648
        $tagDefTable = $project->getTableName('change_tag_def');
649
        $sql = "SELECT ctd_name, ctd_id FROM $tagDefTable
650
                WHERE ctd_name IN ($tags)";
651
        $this->tags = $this->executeProjectsQuery($project, $sql)->fetchAllKeyValue();
652
653
        // Cache and return.
654
        return $this->setCache($cacheKey, $this->tags);
655
    }
656
657
    /**
658
     * Generate the WHERE clause to query for the given tags, filtering out exclusions ('tag_excludes' option).
659
     * For instance, Huggle edits are also tagged as Rollback, but when viewing
660
     * Rollback edits we don't want to show Huggle edits.
661
     * @param Project $project
662
     * @param string $tool
663
     * @param array $tagIds
664
     * @return string
665
     */
666
    private function getTagsExclusionsSql(Project $project, string $tool, array $tagIds): string
667
    {
668
        $tagsList = implode(',', $tagIds);
669
        $tagExcludes = $this->getTools($project)[$tool]['tag_excludes'] ?? [];
670
        $excludesSql = '';
671
672
        if ($tagExcludes && 1 === count($tagIds)) {
673
            // Get tag IDs, filtering out those for which no ID exists (meaning there is no local tag for that tool).
674
            $excludesList = implode(',', array_filter(array_map(function ($tagName) use ($project) {
675
                return $this->getTags($project)[$tagName] ?? null;
676
            }, $tagExcludes)));
677
678
            if (strlen($excludesList)) {
679
                $excludesSql = "AND ct_tag_id NOT IN ($excludesList)";
680
            }
681
        }
682
683
        return "ct_tag_id IN ($tagsList) $excludesSql";
684
    }
685
686
    /**
687
     * Get IDs for tags given the names.
688
     * @param Project $project
689
     * @param array $tagNames
690
     * @return array
691
     */
692
    private function getTagIdsFromNames(Project $project, array $tagNames): array
693
    {
694
        $allTagIds = $this->getTags($project);
695
        $tagIds = [];
696
697
        foreach ($tagNames as $tag) {
698
            if (isset($allTagIds[$tag])) {
699
                $tagIds[] = $allTagIds[$tag];
700
            }
701
        }
702
703
        return $tagIds;
704
    }
705
}
706