AutoEditsRepository::getToolCounts()   B
last analyzed

Complexity

Conditions 6
Paths 7

Size

Total Lines 38
Code Lines 20

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
cc 6
eloc 20
nc 7
nop 5
dl 0
loc 38
rs 8.9777
c 0
b 0
f 0
1
<?php
2
3
declare(strict_types = 1);
4
5
namespace App\Repository;
6
7
use App\Helper\AutomatedEditsHelper;
8
use App\Model\Project;
9
use App\Model\User;
10
use Doctrine\Persistence\ManagerRegistry;
11
use GuzzleHttp\Client;
12
use PDO;
13
use Psr\Cache\CacheItemPoolInterface;
14
use Psr\Log\LoggerInterface;
15
use Symfony\Component\DependencyInjection\ParameterBag\ParameterBagInterface;
16
use Symfony\Component\HttpFoundation\RequestStack;
17
use Wikimedia\IPUtils;
18
19
/**
20
 * AutoEditsRepository is responsible for retrieving data from the database
21
 * about the automated edits made by a user.
22
 * @codeCoverageIgnore
23
 */
24
class AutoEditsRepository extends UserRepository
25
{
26
    protected AutomatedEditsHelper $autoEditsHelper;
27
28
    /** @var array List of automated tools, used for fetching the tool list and filtering it. */
29
    private array $aeTools;
30
31
    /** @var bool Whether to use the /sandbox version of the config, bypassing caching. */
32
    private bool $useSandbox = false;
33
34
    /** @var array Process cache for tags/IDs. */
35
    private array $tags;
36
37
    /**
38
     * @param ManagerRegistry $managerRegistry
39
     * @param CacheItemPoolInterface $cache
40
     * @param Client $guzzle
41
     * @param LoggerInterface $logger
42
     * @param ParameterBagInterface $parameterBag
43
     * @param bool $isWMF
44
     * @param int $queryTimeout
45
     * @param ProjectRepository $projectRepo
46
     * @param AutomatedEditsHelper $autoEditsHelper
47
     * @param RequestStack $requestStack
48
     */
49
    public function __construct(
50
        ManagerRegistry $managerRegistry,
51
        CacheItemPoolInterface $cache,
52
        Client $guzzle,
53
        LoggerInterface $logger,
54
        ParameterBagInterface $parameterBag,
55
        bool $isWMF,
56
        int $queryTimeout,
57
        ProjectRepository $projectRepo,
58
        AutomatedEditsHelper $autoEditsHelper,
59
        RequestStack $requestStack
60
    ) {
61
        $this->autoEditsHelper = $autoEditsHelper;
62
        parent::__construct(
63
            $managerRegistry,
64
            $cache,
65
            $guzzle,
66
            $logger,
67
            $parameterBag,
68
            $isWMF,
69
            $queryTimeout,
70
            $projectRepo,
71
            $requestStack
72
        );
73
    }
74
75
    /**
76
     * @param bool $useSandbox
77
     * @return AutoEditsRepository
78
     */
79
    public function setUseSandbox(bool $useSandbox): AutoEditsRepository
80
    {
81
        $this->useSandbox = $useSandbox;
82
        return $this;
83
    }
84
85
    /**
86
     * @return bool
87
     */
88
    public function getUseSandbox(): bool
89
    {
90
        return $this->useSandbox;
91
    }
92
93
    /**
94
     * Method to give the repository access to the AutomatedEditsHelper and fetch the list of semi-automated tools.
95
     * @param Project $project
96
     * @param int|string $namespace Namespace ID or 'all'.
97
     * @return array
98
     */
99
    public function getTools(Project $project, $namespace = 'all'): array
100
    {
101
        if (!isset($this->aeTools)) {
102
            $this->aeTools = $this->autoEditsHelper->getTools($project, $this->useSandbox);
103
        }
104
105
        if ('all' !== $namespace) {
106
            // Limit by namespace.
107
            return array_filter($this->aeTools, function (array $tool) use ($namespace) {
108
                return empty($tool['namespaces']) ||
109
                    in_array((int)$namespace, $tool['namespaces']) ||
110
                    (
111
                        1 === $namespace % 2 &&
112
                        isset($tool['talk_namespaces'])
113
                    );
114
            });
115
        }
116
117
        return $this->aeTools;
118
    }
119
120
    /**
121
     * Get tools that were misconfigured, also removing them from $this->aeTools.
122
     * @param Project $project
123
     * @return string[] Labels for the invalid tools.
124
     */
125
    public function getInvalidTools(Project $project): array
126
    {
127
        $tools = $this->getTools($project);
128
        $invalidTools = $tools['invalid'] ?? [];
129
        unset($this->aeTools['invalid']);
130
        return $invalidTools;
131
    }
132
133
    /**
134
     * Overrides Repository::setCache(), and will not call the parent (which sets the cache) if using the sandbox.
135
     * @inheritDoc
136
     */
137
    public function setCache(string $cacheKey, $value, $duration = 'PT20M')
138
    {
139
        if ($this->useSandbox) {
140
            return $value;
141
        }
142
143
        return parent::setCache($cacheKey, $value, $duration);
144
    }
145
146
    /**
147
     * Get the number of edits this user made using semi-automated tools.
148
     * @param Project $project
149
     * @param User $user
150
     * @param string|int $namespace Namespace ID or 'all'
151
     * @param int|false $start Start date as Unix timestamp.
152
     * @param int|false $end End date as Unix timestamp.
153
     * @return int Result of query, see below.
154
     */
155
    public function countAutomatedEdits(
156
        Project $project,
157
        User $user,
158
        $namespace = 'all',
159
        $start = false,
160
        $end = false
161
    ): int {
162
        $cacheKey = $this->getCacheKey(func_get_args(), 'user_autoeditcount');
163
        if (!$this->useSandbox && $this->cache->hasItem($cacheKey)) {
164
            return $this->cache->getItem($cacheKey)->get();
165
        }
166
167
        $revDateConditions = $this->getDateConditions($start, $end);
168
169
        // Get the combined regex and tags for the tools
170
        [$regex, $tagIds] = $this->getToolRegexAndTags($project, false, null, $namespace);
171
172
        [$pageJoin, $condNamespace] = $this->getPageAndNamespaceSql($project, $namespace);
173
174
        $revisionTable = $project->getTableName('revision');
175
        $ipcTable = $project->getTableName('ip_changes');
176
        $commentTable = $project->getTableName('comment', 'revision');
177
        $tagTable = $project->getTableName('change_tag');
178
        $commentJoin = '';
179
        $tagJoin = '';
180
181
        $params = [];
182
183
        // IP range handling.
184
        $ipcJoin = '';
185
        $whereClause = 'rev_actor = :actorId';
186
        if ($user->isIpRange()) {
187
            $ipcJoin = "JOIN $ipcTable ON rev_id = ipc_rev_id";
188
            $whereClause = 'ipc_hex BETWEEN :startIp AND :endIp';
189
            [$params['startIp'], $params['endIp']] = IPUtils::parseRange($user->getUsername());
190
        }
191
192
        // Build SQL for detecting AutoEdits via regex and/or tags.
193
        $condTools = [];
194
        if ('' != $regex) {
195
            $commentJoin = "LEFT OUTER JOIN $commentTable ON rev_comment_id = comment_id";
196
            $condTools[] = "comment_text REGEXP :tools";
197
            $params['tools'] = $regex;
198
        }
199
        if ('' != $tagIds) {
200
            $tagJoin = "LEFT OUTER JOIN $tagTable ON ct_rev_id = rev_id";
201
            $condTools[] = "ct_tag_id IN ($tagIds)";
202
        }
203
        $condTool = 'AND (' . implode(' OR ', $condTools) . ')';
204
205
        $sql = "SELECT COUNT(DISTINCT(rev_id))
206
                FROM $revisionTable
207
                $ipcJoin
208
                $pageJoin
209
                $commentJoin
210
                $tagJoin
211
                WHERE $whereClause
212
                $condNamespace
213
                $condTool
214
                $revDateConditions";
215
216
        $resultQuery = $this->executeQuery($sql, $project, $user, $namespace, $params);
217
        $result = (int)$resultQuery->fetchOne();
218
219
        // Cache and return.
220
        return $this->setCache($cacheKey, $result);
221
    }
222
223
    /**
224
     * Get non-automated contributions for the given user.
225
     * @param Project $project
226
     * @param User $user
227
     * @param string|int $namespace Namespace ID or 'all'.
228
     * @param int|false $start Start date as Unix timestamp.
229
     * @param int|false $end End date as Unix timestamp.
230
     * @param int|false $offset Unix timestamp. Used for pagination.
231
     * @param int $limit Number of results to return.
232
     * @return string[] Result of query, with columns 'page_title', 'namespace', 'rev_id', 'timestamp', 'minor',
233
     *   'length', 'length_change', 'comment'.
234
     */
235
    public function getNonAutomatedEdits(
236
        Project $project,
237
        User $user,
238
        $namespace = 'all',
239
        $start = false,
240
        $end = false,
241
        $offset = false,
242
        int $limit = 50
243
    ): array {
244
        $cacheKey = $this->getCacheKey(func_get_args(), 'user_nonautoedits');
245
        if (!$this->useSandbox && $this->cache->hasItem($cacheKey)) {
246
            return $this->cache->getItem($cacheKey)->get();
247
        }
248
249
        $revDateConditions = $this->getDateConditions($start, $end, $offset, 'revs.');
250
251
        // Get the combined regex and tags for the tools
252
        [$regex, $tagIds] = $this->getToolRegexAndTags($project, false, null, $namespace);
253
254
        $pageTable = $project->getTableName('page');
255
        $revisionTable = $project->getTableName('revision');
256
        $ipcTable = $project->getTableName('ip_changes');
257
        $commentTable = $project->getTableName('comment', 'revision');
258
        $tagTable = $project->getTableName('change_tag');
259
260
        // IP range handling.
261
        $ipcJoin = '';
262
        $whereClause = 'revs.rev_actor = :actorId';
263
        $params = ['tools' => $regex];
264
        if ($user->isIpRange()) {
265
            $ipcJoin = "JOIN $ipcTable ON revs.rev_id = ipc_rev_id";
266
            $whereClause = 'ipc_hex BETWEEN :startIp AND :endIp';
267
            [$params['startIp'], $params['endIp']] = IPUtils::parseRange($user->getUsername());
268
        }
269
270
        $condNamespace = 'all' === $namespace ? '' : 'AND page_namespace = :namespace';
271
        $condTag = '' != $tagIds ? "AND NOT EXISTS (SELECT 1 FROM $tagTable
272
            WHERE ct_rev_id = revs.rev_id AND ct_tag_id IN ($tagIds))" : '';
273
274
        $sql = "SELECT
275
                    page_title,
276
                    page_namespace AS `namespace`,
277
                    revs.rev_id AS rev_id,
278
                    revs.rev_timestamp AS `timestamp`,
279
                    revs.rev_minor_edit AS `minor`,
280
                    revs.rev_len AS `length`,
281
                    (CAST(revs.rev_len AS SIGNED) - IFNULL(parentrevs.rev_len, 0)) AS `length_change`,
282
                    comment_text AS `comment`
283
                FROM $pageTable
284
                JOIN $revisionTable AS revs ON (page_id = revs.rev_page)
285
                $ipcJoin
286
                LEFT JOIN $revisionTable AS parentrevs ON (revs.rev_parent_id = parentrevs.rev_id)
287
                LEFT OUTER JOIN $commentTable ON (revs.rev_comment_id = comment_id)
288
                WHERE $whereClause
289
                AND revs.rev_timestamp > 0
290
                AND comment_text NOT RLIKE :tools
291
                $condTag
292
                $revDateConditions
293
                $condNamespace
294
                GROUP BY revs.rev_id
295
                ORDER BY revs.rev_timestamp DESC
296
                LIMIT $limit";
297
298
        $resultQuery = $this->executeQuery($sql, $project, $user, $namespace, $params);
299
        $result = $resultQuery->fetchAllAssociative();
300
301
        // Cache and return.
302
        return $this->setCache($cacheKey, $result);
303
    }
304
305
    /**
306
     * Get (semi-)automated contributions for the given user, and optionally for a given tool.
307
     * @param Project $project
308
     * @param User $user
309
     * @param string|int $namespace Namespace ID or 'all'.
310
     * @param int|false $start Start date as Unix timestamp.
311
     * @param int|false $end End date as Unix timestamp.
312
     * @param string|null $tool Only get edits made with this tool. Must match the keys in the AutoEdits config.
313
     * @param int|false $offset Unix timestamp. Used for pagination.
314
     * @param int $limit Number of results to return.
315
     * @return string[] Result of query, with columns 'page_title', 'namespace', 'rev_id', 'timestamp', 'minor',
316
     *   'length', 'length_change', 'comment'.
317
     */
318
    public function getAutomatedEdits(
319
        Project $project,
320
        User $user,
321
        $namespace = 'all',
322
        $start = false,
323
        $end = false,
324
        ?string $tool = null,
325
        $offset = false,
326
        int $limit = 50
327
    ): array {
328
        $cacheKey = $this->getCacheKey(func_get_args(), 'user_autoedits');
329
        if (!$this->useSandbox && $this->cache->hasItem($cacheKey)) {
330
            return $this->cache->getItem($cacheKey)->get();
331
        }
332
333
        $revDateConditions = $this->getDateConditions($start, $end, $offset, 'revs.');
334
335
        // In this case there is a slight performance improvement we can make if we're not given a start date.
336
        if ('' === $revDateConditions) {
337
            $revDateConditions = 'AND revs.rev_timestamp > 0';
338
        }
339
340
        // Get the combined regex and tags for the tools
341
        [$regex, $tagIds] = $this->getToolRegexAndTags($project, false, $tool);
342
343
        $pageTable = $project->getTableName('page');
344
        $revisionTable = $project->getTableName('revision');
345
        $ipcTable = $project->getTableName('ip_changes');
346
        $commentTable = $project->getTableName('comment', 'revision');
347
        $tagTable = $project->getTableName('change_tag');
348
        $condNamespace = 'all' === $namespace ? '' : 'AND page_namespace = :namespace';
349
        $tagJoin = '';
350
        $condsTool = [];
351
352
        if ('' != $regex) {
353
            $condsTool[] = 'comment_text RLIKE :tools';
354
        }
355
356
        if ('' != $tagIds) {
357
            $tagJoin = "LEFT OUTER JOIN $tagTable ON (ct_rev_id = revs.rev_id)";
358
            $condsTool[] = "ct_tag_id IN ($tagIds)";
359
        }
360
361
        // IP range handling.
362
        $ipcJoin = '';
363
        $whereClause = 'revs.rev_actor = :actorId';
364
        $params = ['tools' => $regex];
365
        if ($user->isIpRange()) {
366
            $ipcJoin = "JOIN $ipcTable ON rev_id = ipc_rev_id";
367
            $whereClause = 'ipc_hex BETWEEN :startIp AND :endIp';
368
            [$params['startIp'], $params['endIp']] = IPUtils::parseRange($user->getUsername());
369
        }
370
371
        $sql = "SELECT
372
                    page_title,
373
                    page_namespace AS `namespace`,
374
                    revs.rev_id AS `rev_id`,
375
                    revs.rev_timestamp AS `timestamp`,
376
                    revs.rev_minor_edit AS `minor`,
377
                    revs.rev_len AS `length`,
378
                    (CAST(revs.rev_len AS SIGNED) - IFNULL(parentrevs.rev_len, 0)) AS `length_change`,
379
                    comment_text AS `comment`
380
                FROM $pageTable
381
                JOIN $revisionTable AS revs ON (page_id = revs.rev_page)
382
                $ipcJoin
383
                LEFT JOIN $revisionTable AS parentrevs ON (revs.rev_parent_id = parentrevs.rev_id)
384
                LEFT OUTER JOIN $commentTable ON (revs.rev_comment_id = comment_id)
385
                $tagJoin
386
                WHERE $whereClause
387
                $revDateConditions
388
                $condNamespace
389
                AND (".implode(' OR ', $condsTool).")
390
                GROUP BY revs.rev_id
391
                ORDER BY revs.rev_timestamp DESC
392
                LIMIT $limit";
393
394
        $resultQuery = $this->executeQuery($sql, $project, $user, $namespace, $params);
395
        $result = $resultQuery->fetchAllAssociative();
396
397
        // Cache and return.
398
        return $this->setCache($cacheKey, $result);
399
    }
400
401
    /**
402
     * Get counts of known automated tools used by the given user.
403
     * @param Project $project
404
     * @param User $user
405
     * @param string|int $namespace Namespace ID or 'all'.
406
     * @param int|false $start Start date as Unix timestamp.
407
     * @param int|false $end End date as Unix timestamp.
408
     * @return string[] Each tool that they used along with the count and link:
409
     *                  [
410
     *                      'Twinkle' => [
411
     *                          'count' => 50,
412
     *                          'link' => 'Wikipedia:Twinkle',
413
     *                      ],
414
     *                  ]
415
     */
416
    public function getToolCounts(Project $project, User $user, $namespace = 'all', $start = false, $end = false): array
417
    {
418
        $cacheKey = $this->getCacheKey(func_get_args(), 'user_autotoolcounts');
419
        if (!$this->useSandbox && $this->cache->hasItem($cacheKey)) {
420
            return $this->cache->getItem($cacheKey)->get();
421
        }
422
423
        $sql = $this->getAutomatedCountsSql($project, $user, $namespace, $start, $end);
424
        $params = [];
425
        if ($user->isIpRange()) {
426
            [$params['startIp'], $params['endIp']] = IPUtils::parseRange($user->getUsername());
427
        }
428
        $resultQuery = $this->executeQuery($sql, $project, $user, $namespace, $params);
429
430
        $tools = $this->getTools($project, $namespace);
431
432
        // handling results
433
        $results = [];
434
435
        while ($row = $resultQuery->fetchAssociative()) {
436
            // Only track tools that they've used at least once
437
            $tool = $row['toolname'];
438
            if ($row['count'] > 0) {
439
                $results[$tool] = [
440
                    'link' => $tools[$tool]['link'],
441
                    'label' => $tools[$tool]['label'] ?? $tool,
442
                    'count' => $row['count'],
443
                ];
444
            }
445
        }
446
447
        // Sort the array by count
448
        uasort($results, function ($a, $b) {
449
            return $b['count'] - $a['count'];
450
        });
451
452
        // Cache and return.
453
        return $this->setCache($cacheKey, $results);
454
    }
455
456
    /**
457
     * Get SQL for getting counts of known automated tools used by the user.
458
     * @see self::getAutomatedCounts()
459
     * @param Project $project
460
     * @param User $user
461
     * @param string|int $namespace Namespace ID or 'all'.
462
     * @param int|false $start Start date as Unix timestamp.
463
     * @param int|false $end End date as Unix timestamp.
464
     * @return string The SQL.
465
     */
466
    private function getAutomatedCountsSql(
467
        Project $project,
468
        User $user,
469
        $namespace,
470
        $start = false,
471
        $end = false
472
    ): string {
473
        $revDateConditions = $this->getDateConditions($start, $end);
474
475
        // Load the semi-automated edit types.
476
        $tools = $this->getTools($project, $namespace);
477
478
        // Create a collection of queries that we're going to run.
479
        $queries = [];
480
481
        $revisionTable = $project->getTableName('revision');
482
        $ipcTable = $project->getTableName('ip_changes');
483
        [$pageJoin, $condNamespace] = $this->getPageAndNamespaceSql($project, $namespace);
484
        $conn = $this->getProjectsConnection($project);
485
486
        // IP range handling.
487
        $ipcJoin = '';
488
        $whereClause = 'rev_actor = :actorId';
489
        if ($user->isIpRange()) {
490
            $ipcJoin = "JOIN $ipcTable ON rev_id = ipc_rev_id";
491
            $whereClause = 'ipc_hex BETWEEN :startIp AND :endIp';
492
        }
493
494
        foreach ($tools as $toolName => $values) {
495
            [$condTool, $commentJoin, $tagJoin] = $this->getInnerAutomatedCountsSql($project, $toolName, $values);
496
497
            $toolName = $conn->quote($toolName, PDO::PARAM_STR);
498
499
            // No regex or tag provided for this tool. This can happen for tag-only tools that are in the global
500
            // configuration, but no local tag exists on the said project.
501
            if ('' === $condTool) {
502
                continue;
503
            }
504
505
            $queries[] .= "
506
                SELECT $toolName AS toolname, COUNT(DISTINCT(rev_id)) AS count
507
                FROM $revisionTable
508
                $ipcJoin
509
                $pageJoin
510
                $commentJoin
511
                $tagJoin
512
                WHERE $whereClause
513
                AND $condTool
514
                $condNamespace
515
                $revDateConditions";
516
        }
517
518
        // Combine to one big query.
519
        return implode(' UNION ', $queries);
520
    }
521
522
    /**
523
     * Get some of the inner SQL for self::getAutomatedCountsSql().
524
     * @param Project $project
525
     * @param string $toolName
526
     * @param string[] $values Values as defined in the AutoEdits config.
527
     * @return string[] [Equality clause, JOIN clause]
528
     */
529
    protected function getInnerAutomatedCountsSql(Project $project, string $toolName, array $values): array
530
    {
531
        $conn = $this->getProjectsConnection($project);
532
        $commentJoin = '';
533
        $tagJoin = '';
534
        $condTool = '';
535
536
        if (isset($values['regex'])) {
537
            $commentTable = $project->getTableName('comment', 'revision');
538
            $commentJoin = "LEFT OUTER JOIN $commentTable ON rev_comment_id = comment_id";
539
            $regex = $conn->quote($values['regex'], PDO::PARAM_STR);
540
            $condTool = "comment_text REGEXP $regex";
541
        }
542
        if (isset($values['tags'])) {
543
            $tagIds = $this->getTagIdsFromNames($project, $values['tags']);
0 ignored issues
show
Bug introduced by
$values['tags'] of type string is incompatible with the type array expected by parameter $tagNames of App\Repository\AutoEdits...y::getTagIdsFromNames(). ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

543
            $tagIds = $this->getTagIdsFromNames($project, /** @scrutinizer ignore-type */ $values['tags']);
Loading history...
544
545
            if ($tagIds) {
0 ignored issues
show
Bug Best Practice introduced by
The expression $tagIds of type array is implicitly converted to a boolean; are you sure this is intended? If so, consider using ! empty($expr) instead to make it clear that you intend to check for an array without elements.

This check marks implicit conversions of arrays to boolean values in a comparison. While in PHP an empty array is considered to be equal (but not identical) to false, this is not always apparent.

Consider making the comparison explicit by using empty(..) or ! empty(...) instead.

Loading history...
546
                $tagTable = $project->getTableName('change_tag');
547
                $tagJoin = "LEFT OUTER JOIN $tagTable ON ct_rev_id = rev_id";
548
                $tagClause = $this->getTagsExclusionsSql($project, $toolName, $tagIds);
549
550
                // Use tags in addition to the regex clause, if already present.
551
                // Tags are more reliable but may not be present for edits made with
552
                // older versions of the tool, before it started adding tags.
553
                if ('' === $condTool) {
554
                    $condTool = $tagClause;
555
                } else {
556
                    $condTool = "($condTool OR $tagClause)";
557
                }
558
            }
559
        }
560
561
        return [$condTool, $commentJoin, $tagJoin];
562
    }
563
564
    /**
565
     * Get the combined regex and tags for all semi-automated tools, or the given tool, ready to be used in a query.
566
     * @param Project $project
567
     * @param bool $nonAutoEdits Set to true to exclude tools with the 'contribs' flag.
568
     * @param string|null $tool
569
     * @param int|string|null $namespace Tools only used in given namespace ID, or 'all' for all namespaces.
570
     * @return array In the format: ['combined|regex', '1,2,3'] where the second element is a
571
     *   comma-separated list of the tag IDs, ready to be used in SQL.
572
     */
573
    private function getToolRegexAndTags(
574
        Project $project,
575
        bool $nonAutoEdits = false,
576
        ?string $tool = null,
577
        $namespace = null
578
    ): array {
579
        $tools = $this->getTools($project);
580
        $regexes = [];
581
        $tagIds = [];
582
583
        if ('' != $tool) {
584
            $tools = [$tools[$tool]];
585
        }
586
587
        foreach (array_values($tools) as $values) {
588
            if ($nonAutoEdits && isset($values['contribs'])) {
589
                continue;
590
            }
591
592
            if (is_numeric($namespace) &&
593
                !empty($values['namespaces']) &&
594
                !in_array((int)$namespace, $values['namespaces'])
595
            ) {
596
                continue;
597
            }
598
599
            if (isset($values['regex'])) {
600
                $regexes[] = $values['regex'];
601
            }
602
            if (isset($values['tags'])) {
603
                $tagIds = array_merge($tagIds, $this->getTagIdsFromNames($project, $values['tags']));
604
            }
605
        }
606
607
        return [
608
            implode('|', $regexes),
609
            implode(',', $tagIds),
610
        ];
611
    }
612
613
    /**
614
     * Get the IDs of tags for given Project, which are used in the IN clauses of other queries above.
615
     * This join decomposition is actually faster than JOIN'ing on change_tag_def all in one query.
616
     * @param Project $project
617
     * @return int[] Keys are the tag name, values are the IDs.
618
     */
619
    public function getTags(Project $project): array
620
    {
621
        // Use process cache; ensures we don't needlessly re-query for tag IDs
622
        // during the same request when using the ?usesandbox=1 option.
623
        if (isset($this->tags)) {
624
            return $this->tags;
625
        }
626
627
        $cacheKey = $this->getCacheKey(func_get_args(), 'ae_tag_ids');
628
        if (!$this->useSandbox && $this->cache->hasItem($cacheKey)) {
629
            return $this->cache->getItem($cacheKey)->get();
630
        }
631
632
        $conn = $this->getProjectsConnection($project);
633
634
        // Get all tag values.
635
        $tags = [];
636
        foreach (array_values($this->getTools($project)) as $values) {
637
            if (isset($values['tags'])) {
638
                $tags = array_merge(
639
                    $tags,
640
                    array_map(function ($tag) use ($conn) {
641
                        return $conn->quote($tag, PDO::PARAM_STR);
642
                    }, $values['tags'])
643
                );
644
            }
645
        }
646
647
        $tags = implode(',', $tags);
648
        $tagDefTable = $project->getTableName('change_tag_def');
649
        $sql = "SELECT ctd_name, ctd_id FROM $tagDefTable
650
                WHERE ctd_name IN ($tags)";
651
        $this->tags = $this->executeProjectsQuery($project, $sql)->fetchAllKeyValue();
652
653
        // Cache and return.
654
        return $this->setCache($cacheKey, $this->tags);
655
    }
656
657
    /**
658
     * Generate the WHERE clause to query for the given tags, filtering out exclusions ('tag_excludes' option).
659
     * For instance, Huggle edits are also tagged as Rollback, but when viewing
660
     * Rollback edits we don't want to show Huggle edits.
661
     * @param Project $project
662
     * @param string $tool
663
     * @param array $tagIds
664
     * @return string
665
     */
666
    private function getTagsExclusionsSql(Project $project, string $tool, array $tagIds): string
667
    {
668
        $tagsList = implode(',', $tagIds);
669
        $tagExcludes = $this->getTools($project)[$tool]['tag_excludes'] ?? [];
670
        $excludesSql = '';
671
672
        if ($tagExcludes && 1 === count($tagIds)) {
673
            // Get tag IDs, filtering out those for which no ID exists (meaning there is no local tag for that tool).
674
            $excludesList = implode(',', array_filter(array_map(function ($tagName) use ($project) {
675
                return $this->getTags($project)[$tagName] ?? null;
676
            }, $tagExcludes)));
677
678
            if (strlen($excludesList)) {
679
                $excludesSql = "AND ct_tag_id NOT IN ($excludesList)";
680
            }
681
        }
682
683
        return "ct_tag_id IN ($tagsList) $excludesSql";
684
    }
685
686
    /**
687
     * Get IDs for tags given the names.
688
     * @param Project $project
689
     * @param array $tagNames
690
     * @return array
691
     */
692
    private function getTagIdsFromNames(Project $project, array $tagNames): array
693
    {
694
        $allTagIds = $this->getTags($project);
695
        $tagIds = [];
696
697
        foreach ($tagNames as $tag) {
698
            if (isset($allTagIds[$tag])) {
699
                $tagIds[] = $allTagIds[$tag];
700
            }
701
        }
702
703
        return $tagIds;
704
    }
705
}
706