Passed
Pull Request — main (#442)
by MusikAnimal
08:00 queued 04:10
created

Repository::setContainer()   A

Complexity

Conditions 1
Paths 1

Size

Total Lines 5
Code Lines 3

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
cc 1
eloc 3
nc 1
nop 1
dl 0
loc 5
rs 10
c 0
b 0
f 0
1
<?php
2
3
declare(strict_types = 1);
4
5
namespace App\Repository;
6
7
use App\Model\Project;
8
use DateInterval;
9
use Doctrine\DBAL\Connection;
10
use Doctrine\DBAL\DBALException;
11
use Doctrine\DBAL\Driver\ResultStatement;
12
use Doctrine\DBAL\Exception\DriverException;
13
use Doctrine\DBAL\Query\QueryBuilder;
14
use GuzzleHttp\Client;
15
use Psr\Cache\CacheItemPoolInterface;
16
use Psr\Container\ContainerInterface;
17
use Psr\Log\LoggerInterface;
18
use Symfony\Component\HttpFoundation\Response;
19
use Symfony\Component\HttpKernel\Exception\HttpException;
20
use Symfony\Component\HttpKernel\Exception\ServiceUnavailableHttpException;
21
22
/**
23
 * A repository is responsible for retrieving data from wherever it lives (databases, APIs, filesystems, etc.)
24
 */
25
abstract class Repository
26
{
27
    protected CacheItemPoolInterface $cache;
28
    protected ContainerInterface $container;
29
    protected Client $guzzle;
30
    protected LoggerInterface $logger;
31
32
    /** @var Connection The database connection to the meta database. */
33
    private $metaConnection;
34
35
    /** @var Connection The database connection to other tools' databases.  */
36
    private $toolsConnection;
37
38
    /** @var bool Whether this is configured as a WMF installation. */
39
    protected $isWMF;
40
41
    /** @var int */
42
    protected $queryTimeout;
43
44
    /** @var string Prefix URL for where the dblists live. Will be followed by i.e. 's1.dblist' */
45
    public const DBLISTS_URL = 'https://noc.wikimedia.org/conf/dblists/';
46
47
    /**
48
     * Create a new Repository.
49
     * @param ContainerInterface $container
50
     * @param CacheItemPoolInterface $cache
51
     * @param Client $guzzle
52
     */
53
    public function __construct(
54
        ContainerInterface $container,
55
        CacheItemPoolInterface $cache,
56
        Client $guzzle,
57
        LoggerInterface $logger,
58
        bool $isWMF,
59
        int $queryTimeout
60
    ) {
61
        $this->container = $container;
62
        $this->cache = $cache;
63
        $this->guzzle = $guzzle;
64
        $this->logger = $logger;
65
        $this->isWMF = $isWMF;
66
        $this->queryTimeout = $queryTimeout;
67
    }
68
69
    /***************
70
     * CONNECTIONS *
71
     ***************/
72
73
    /**
74
     * Get the database connection for the 'meta' database.
75
     * @return Connection
76
     * @codeCoverageIgnore
77
     */
78
    protected function getMetaConnection(): Connection
79
    {
80
        if (!$this->metaConnection instanceof Connection) {
0 ignored issues
show
introduced by
$this->metaConnection is always a sub-type of Doctrine\DBAL\Connection.
Loading history...
81
            $this->metaConnection = $this->getProjectsConnection('meta');
82
        }
83
        return $this->metaConnection;
84
    }
85
86
    /**
87
     * Get a database connection for the given database.
88
     * @param Project|string $project Project instance, database name (i.e. 'enwiki'), or slice (i.e. 's1').
89
     * @return Connection
90
     * @codeCoverageIgnore
91
     */
92
    protected function getProjectsConnection($project): Connection
93
    {
94
        if (is_string($project)) {
95
            if (1 === preg_match('/^s\d+$/', $project)) {
96
                $slice = $project;
97
            } else {
98
                // Assume database name. Remove _p if given.
99
                $db = str_replace('_p', '', $project);
100
                $slice = $this->getDbList()[$db];
101
            }
102
        } else {
103
            $slice = $this->getDbList()[$project->getDatabaseName()];
104
        }
105
106
        return $this->container->get('doctrine')
107
            ->getConnection('toolforge_'.$slice);
108
    }
109
110
    /**
111
     * Get the database connection for the 'tools' database (the one that other tools store data in).
112
     * @return Connection
113
     * @codeCoverageIgnore
114
     */
115
    protected function getToolsConnection(): Connection
116
    {
117
        if (!$this->toolsConnection instanceof Connection) {
0 ignored issues
show
introduced by
$this->toolsConnection is always a sub-type of Doctrine\DBAL\Connection.
Loading history...
118
            $this->toolsConnection = $this->container
119
                ->get('doctrine')
120
                ->getManager('toolsdb')
121
                ->getConnection();
122
        }
123
        return $this->toolsConnection;
124
    }
125
126
    /**
127
     * Fetch and concatenate all the dblists into one array.
128
     * Based on ToolforgeBundle https://github.com/wikimedia/ToolforgeBundle/blob/master/Service/ReplicasClient.php
129
     * License: GPL 3.0 or later
130
     * @return string[] Keys are database names (i.e. 'enwiki'), values are the slices (i.e. 's1').
131
     */
132
    protected function getDbList(): array
133
    {
134
        $cacheKey = 'dblists';
135
        if ($this->cache->hasItem($cacheKey)) {
136
            return $this->cache->getItem($cacheKey)->get();
137
        }
138
139
        $dbList = [];
140
        $exists = true;
141
        $i = 0;
142
143
        while ($exists) {
144
            $i += 1;
145
            $response = $this->guzzle->request('GET', self::DBLISTS_URL."s$i.dblist", ['http_errors' => false]);
146
            $exists = in_array(
147
                $response->getStatusCode(),
148
                [Response::HTTP_OK, Response::HTTP_NOT_MODIFIED]
149
            ) && $i < 50; // Safeguard
150
151
            if (!$exists) {
152
                break;
153
            }
154
155
            $lines = explode("\n", $response->getBody()->getContents());
156
            foreach ($lines as $line) {
157
                $line = trim($line);
158
                if (1 !== preg_match('/^#/', $line) && '' !== $line) {
159
                    // Skip comments and blank lines.
160
                    $dbList[$line] = "s$i";
161
                }
162
            }
163
        }
164
165
        // Manually add the meta and centralauth databases.
166
        $dbList['meta'] = 's7';
167
        $dbList['centralauth'] = 's7';
168
169
        // Cache for one week.
170
        return $this->setCache($cacheKey, $dbList, 'P1W');
171
    }
172
173
    /*****************
174
     * QUERY HELPERS *
175
     *****************/
176
177
    /**
178
     * Make a request to the MediaWiki API.
179
     * @param Project $project
180
     * @param array $params
181
     * @return array
182
     */
183
    public function executeApiRequest(Project $project, array $params): array
184
    {
185
        return json_decode($this->guzzle->request('GET', $project->getApiUrl(), [
186
            'query' => array_merge([
187
                'action' => 'query',
188
                'format' => 'json',
189
            ], $params),
190
        ])->getBody()->getContents(), true);
191
    }
192
193
    /**
194
     * Normalize and quote a table name for use in SQL.
195
     * @param string $databaseName
196
     * @param string $tableName
197
     * @param string|null $tableExtension Optional table extension, which will only get used if we're on labs.
198
     *   If null, table extensions are added as defined in table_map.yml. If a blank string, no extension is added.
199
     * @return string Fully-qualified and quoted table name.
200
     */
201
    public function getTableName(string $databaseName, string $tableName, ?string $tableExtension = null): string
202
    {
203
        $mapped = false;
204
205
        // This is a workaround for a one-to-many mapping
206
        // as required by Labs. We combine $tableName with
207
        // $tableExtension in order to generate the new table name
208
        if ($this->isWMF && null !== $tableExtension) {
209
            $mapped = true;
210
            $tableName .=('' === $tableExtension ? '' : '_'.$tableExtension);
211
        } elseif ($this->container->hasParameter("app.table.$tableName")) {
0 ignored issues
show
Bug introduced by
The method hasParameter() does not exist on Psr\Container\ContainerInterface. It seems like you code against a sub-type of Psr\Container\ContainerInterface such as Symfony\Component\Depend...tion\ContainerInterface. ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-call  annotation

211
        } elseif ($this->container->/** @scrutinizer ignore-call */ hasParameter("app.table.$tableName")) {
Loading history...
212
            // Use the table specified in the table mapping configuration, if present.
213
            $mapped = true;
214
            $tableName = $this->container->getParameter("app.table.$tableName");
0 ignored issues
show
Bug introduced by
The method getParameter() does not exist on Psr\Container\ContainerInterface. It seems like you code against a sub-type of Psr\Container\ContainerInterface such as Symfony\Component\Depend...tion\ContainerInterface. ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-call  annotation

214
            /** @scrutinizer ignore-call */ 
215
            $tableName = $this->container->getParameter("app.table.$tableName");
Loading history...
215
        }
216
217
        // For 'revision' and 'logging' tables (actually views) on Labs, use the indexed versions
218
        // (that have some rows hidden, e.g. for revdeleted users).
219
        // This is a safeguard in case table mapping isn't properly set up.
220
        $isLoggingOrRevision = in_array($tableName, ['revision', 'logging', 'archive']);
221
        if (!$mapped && $isLoggingOrRevision && $this->isWMF) {
222
            $tableName .="_userindex";
223
        }
224
225
        // Figure out database name.
226
        // Use class variable for the database name if not set via function parameter.
227
        if ($this->isWMF && '_p' != substr($databaseName, -2)) {
228
            // Append '_p' if this is labs.
229
            $databaseName .= '_p';
230
        }
231
232
        return "`$databaseName`.`$tableName`";
233
    }
234
235
    /**
236
     * Get a unique cache key for the given list of arguments. Assuming each argument of
237
     * your function should be accounted for, you can pass in them all with func_get_args:
238
     *   $this->getCacheKey(func_get_args(), 'unique key for function');
239
     * Arguments that are a model should implement their own getCacheKey() that returns
240
     * a unique identifier for an instance of that model. See User::getCacheKey() for example.
241
     * @param array|mixed $args Array of arguments or a single argument.
242
     * @param string|null $key Unique key for this function. If omitted the function name itself
243
     *   is used, which is determined using `debug_backtrace`.
244
     * @return string
245
     */
246
    public function getCacheKey($args, ?string $key = null): string
247
    {
248
        if (null === $key) {
249
            $key = debug_backtrace()[1]['function'];
250
        }
251
252
        if (!is_array($args)) {
253
            $args = [$args];
254
        }
255
256
        // Start with base key.
257
        $cacheKey = $key;
258
259
        // Loop through and determine what values to use based on type of object.
260
        foreach ($args as $arg) {
261
            // Zero is an acceptable value.
262
            if ('' === $arg || null === $arg) {
263
                continue;
264
            }
265
266
            $cacheKey .= $this->getCacheKeyFromArg($arg);
267
        }
268
269
        // Remove reserved characters.
270
        return preg_replace('/[{}()\/\@\:"]/', '', $cacheKey);
271
    }
272
273
    /**
274
     * Get a cache-friendly string given an argument.
275
     * @param mixed $arg
276
     * @return string
277
     */
278
    private function getCacheKeyFromArg($arg): string
279
    {
280
        if (is_object($arg) && method_exists($arg, 'getCacheKey')) {
281
            return '.'.$arg->getCacheKey();
282
        } elseif (is_array($arg)) {
283
            // Assumed to be an array of objects that can be parsed into a string.
284
            return '.'.md5(implode('', $arg));
285
        } else {
286
            // Assumed to be a string, number or boolean.
287
            return '.'.md5((string)$arg);
288
        }
289
    }
290
291
    /**
292
     * Set the cache with given options.
293
     * @param string $cacheKey
294
     * @param mixed $value
295
     * @param string $duration Valid DateInterval string.
296
     * @return mixed The given $value.
297
     */
298
    public function setCache(string $cacheKey, $value, string $duration = 'PT20M')
299
    {
300
        $cacheItem = $this->cache
301
            ->getItem($cacheKey)
302
            ->set($value)
303
            ->expiresAfter(new DateInterval($duration));
304
        $this->cache->save($cacheItem);
305
        return $value;
306
    }
307
308
    /********************************
309
     * DATABASE INTERACTION HELPERS *
310
     ********************************/
311
312
    /**
313
     * Creates WHERE conditions with date range to be put in query.
314
     * @param false|int $start Unix timestamp.
315
     * @param false|int $end Unix timestamp.
316
     * @param false|int $offset Unix timestamp. Used for pagination, will end up replacing $end.
317
     * @param string $tableAlias Alias of table FOLLOWED BY DOT.
318
     * @param string $field
319
     * @return string
320
     */
321
    public function getDateConditions(
322
        $start,
323
        $end,
324
        $offset = false,
325
        string $tableAlias = '',
326
        string $field = 'rev_timestamp'
327
    ) : string {
328
        $datesConditions = '';
329
330
        if (is_int($start)) {
331
            // Convert to YYYYMMDDHHMMSS.
332
            $start = date('Ymd', $start).'000000';
333
            $datesConditions .= " AND {$tableAlias}{$field} >= '$start'";
334
        }
335
336
        // When we're given an $offset, it basically replaces $end, except it's also a full timestamp.
337
        if (is_int($offset)) {
338
            $offset = date('YmdHis', $offset);
339
            $datesConditions .= " AND {$tableAlias}{$field} <= '$offset'";
340
        } elseif (is_int($end)) {
341
            $end = date('Ymd', $end) . '235959';
342
            $datesConditions .= " AND {$tableAlias}{$field} <= '$end'";
343
        }
344
345
        return $datesConditions;
346
    }
347
348
    /**
349
     * Execute a query using the projects connection, handling certain Exceptions.
350
     * @param Project|string $project Project instance, database name (i.e. 'enwiki'), or slice (i.e. 's1').
351
     * @param string $sql
352
     * @param array $params Parameters to bound to the prepared query.
353
     * @param int|null $timeout Maximum statement time in seconds. null will use the
354
     *   default specified by the app.query_timeout config parameter.
355
     * @return ResultStatement
356
     * @throws DriverException
357
     * @throws DBALException
358
     * @codeCoverageIgnore
359
     */
360
    public function executeProjectsQuery(
361
        $project,
362
        string $sql,
363
        array $params = [],
364
        ?int $timeout = null
365
    ): ResultStatement {
366
        try {
367
            $timeout = $timeout ?? $this->queryTimeout;
368
            $sql = "SET STATEMENT max_statement_time = $timeout FOR\n".$sql;
369
370
            return $this->getProjectsConnection($project)->executeQuery($sql, $params);
0 ignored issues
show
Security SQL Injection introduced by
$params can contain request data and is used in sql context(s) leading to a potential security vulnerability.

3 paths for user data to reach this point

  1. Path: LargestPages::__construct() is called in src/Controller/LargestPagesController.php on line 68
  1. LargestPages::__construct() is called
    in src/Controller/LargestPagesController.php on line 68
  2. Enters via parameter $excludePattern
    in src/Model/LargestPages.php on line 35
  3. $excludePattern is assigned to property LargestPages::$excludePattern
    in src/Model/LargestPages.php on line 41
  4. Read from property LargestPages::$excludePattern, and LargestPagesRepository::getData() is called
    in src/Model/LargestPages.php on line 72
  5. Enters via parameter $excludePattern
    in src/Repository/LargestPagesRepository.php on line 76
  6. Repository::executeProjectsQuery() is called
    in src/Repository/LargestPagesRepository.php on line 100
  7. Enters via parameter $params
    in src/Repository/Repository.php on line 363
  2. Path: $this->request->cookies->get($name) is assigned to property XtoolsController::$cookies in src/Controller/XtoolsController.php on line 306
  1. $this->request->cookies->get($name) is assigned to property XtoolsController::$cookies
    in src/Controller/XtoolsController.php on line 306
  2. Read from property XtoolsController::$cookies, and XtoolsController::validateProject() is called
    in src/Controller/XtoolsController.php on line 372
  3. Enters via parameter $projectQuery
    in src/Controller/XtoolsController.php on line 459
  4. ProjectRepository::getProject() is called
    in src/Controller/XtoolsController.php on line 461
  5. Enters via parameter $projectIdent
    in src/Repository/ProjectRepository.php on line 54
  6. Project::__construct() is called
    in src/Repository/ProjectRepository.php on line 56
  7. Enters via parameter $nameOrUrl
    in src/Model/Project.php on line 33
  8. $nameOrUrl is assigned to property Project::$nameUnnormalized
    in src/Model/Project.php on line 35
  9. Read from property Project::$nameUnnormalized, and ProjectRepository::getOne() is called
    in src/Model/Project.php on line 90
  10. Enters via parameter $project
    in src/Repository/ProjectRepository.php on line 161
  11. Data is passed through rtrim(), and rtrim($project, '_p') is assigned to $project
    in src/Repository/ProjectRepository.php on line 170
  12. Repository::executeProjectsQuery() is called
    in src/Repository/ProjectRepository.php on line 202
  13. Enters via parameter $params
    in src/Repository/Repository.php on line 363
  3. Path: LargestPages::__construct() is called in src/Controller/LargestPagesController.php on line 67
  1. LargestPages::__construct() is called
    in src/Controller/LargestPagesController.php on line 67
  2. Enters via parameter $includePattern
    in src/Model/LargestPages.php on line 34
  3. $includePattern is assigned to property LargestPages::$includePattern
    in src/Model/LargestPages.php on line 40
  4. Read from property LargestPages::$includePattern, and LargestPagesRepository::getData() is called
    in src/Model/LargestPages.php on line 71
  5. Enters via parameter $includePattern
    in src/Repository/LargestPagesRepository.php on line 76
  6. Repository::executeProjectsQuery() is called
    in src/Repository/LargestPagesRepository.php on line 100
  7. Enters via parameter $params
    in src/Repository/Repository.php on line 363

Used in sql context

  1. Connection::executeQuery() is called
    in src/Repository/Repository.php on line 370
  2. Enters via parameter $params
    in vendor/doctrine/dbal/lib/Doctrine/DBAL/Connection.php on line 1284
  3. Data is passed through expandListParameters()
    in vendor/doctrine/dbal/lib/Doctrine/DBAL/Connection.php on line 1299
  4. Doctrine\DBAL\SQLParserUtils::expandListParameters($sql, $params, $types) is assigned to $params
    in vendor/doctrine/dbal/lib/Doctrine/DBAL/Connection.php on line 1299
  5. DB2Statement::execute() is called
    in vendor/doctrine/dbal/lib/Doctrine/DBAL/Connection.php on line 1306
  6. Enters via parameter $params
    in vendor/doctrine/dbal/lib/Doctrine/DBAL/Driver/IBMDB2/DB2Statement.php on line 216
  7. db2_execute() is called
    in vendor/doctrine/dbal/lib/Doctrine/DBAL/Driver/IBMDB2/DB2Statement.php on line 238

Preventing SQL Injection

There are two options to prevent SQL injection. Generally, it is recommended to use parameter binding:

$stmt = mysqli_prepare("SELECT * FROM users WHERE name = ?");
$stmt->bind_param("s", $taintedUserName);

An alternative – although generally not recommended – is to escape your data manually:

$mysqli = new mysqli('localhost', 'user', 'pass', 'dbname');

$escaped = $mysqli->real_escape_string($taintedUserName);
$mysqli->query("SELECT * FROM users WHERE name = '".$escaped."'");

General Strategies to prevent injection

In general, it is advisable to prevent any user-data to reach this point. This can be done by white-listing certain values:

if ( ! in_array($value, array('this-is-allowed', 'and-this-too'), true)) {
    throw new \InvalidArgumentException('This input is not allowed.');
}

For numeric data, we recommend to explicitly cast the data:

$sanitized = (integer) $tainted;
Loading history...
371
        } catch (DriverException $e) {
372
            $this->handleDriverError($e, $timeout);
373
        }
374
    }
375
376
    /**
377
     * Execute a query using the projects connection, handling certain Exceptions.
378
     * @param QueryBuilder $qb
379
     * @param int|null $timeout Maximum statement time in seconds. null will use the
380
     *   default specified by the app.query_timeout config parameter.
381
     * @return ResultStatement
382
     * @throws HttpException
383
     * @throws DriverException
384
     * @codeCoverageIgnore
385
     */
386
    public function executeQueryBuilder(QueryBuilder $qb, ?int $timeout = null): ResultStatement
387
    {
388
        try {
389
            $timeout = $timeout ?? $this->container->getParameter('app.query_timeout');
390
            $sql = "SET STATEMENT max_statement_time = $timeout FOR\n".$qb->getSQL();
391
            return $qb->getConnection()->executeQuery($sql, $qb->getParameters(), $qb->getParameterTypes());
392
        } catch (DriverException $e) {
393
            $this->handleDriverError($e, $timeout);
394
        }
395
    }
396
397
    /**
398
     * Special handling of some DriverExceptions, otherwise original Exception is thrown.
399
     * @param DriverException $e
400
     * @param int|null $timeout Timeout value, if applicable. This is passed to the i18n message.
401
     * @throws HttpException
402
     * @throws DriverException
403
     * @codeCoverageIgnore
404
     */
405
    private function handleDriverError(DriverException $e, ?int $timeout): void
406
    {
407
        // If no value was passed for the $timeout, it must be the default.
408
        if (null === $timeout) {
409
            $timeout = $this->queryTimeout;
410
        }
411
412
        if (1226 === $e->getErrorCode()) {
413
            throw new ServiceUnavailableHttpException(30, 'error-service-overload', null, 503);
414
        } elseif (in_array($e->getErrorCode(), [1969, 2006, 2013])) {
415
            // FIXME: Attempt to reestablish connection on 2006 error (MySQL server has gone away).
416
            throw new HttpException(504, 'error-query-timeout', null, [], $timeout);
417
        } else {
418
            throw $e;
419
        }
420
    }
421
}
422