Test Setup Failed
Pull Request — main (#426)
by MusikAnimal
17:10 queued 11:44
created

Repository::getTableName()   B

Complexity

Conditions 10
Paths 16

Size

Total Lines 32
Code Lines 13

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
cc 10
eloc 13
nc 16
nop 3
dl 0
loc 32
rs 7.6666
c 0
b 0
f 0

How to fix   Complexity   

Long Method

Small methods make your code easier to understand, in particular if combined with a good name. Besides, if your method is small, finding a good name is usually much easier.

For example, if you find yourself adding comments to a method's body, this is usually a good sign to extract the commented part to a new method, and use the comment as a starting point when coming up with a good name for this new method.

Commonly applied refactorings include:

1
<?php
2
/**
3
 * This file contains only the Repository class.
4
 */
5
6
declare(strict_types = 1);
7
8
namespace App\Repository;
9
10
use App\Model\Project;
11
use DateInterval;
12
use Doctrine\DBAL\Connection;
13
use Doctrine\DBAL\Driver\ResultStatement;
14
use Doctrine\DBAL\Exception\DriverException;
15
use Doctrine\DBAL\Query\QueryBuilder;
16
use GuzzleHttp\Client;
17
use Psr\Cache\CacheItemPoolInterface;
18
use Psr\Log\LoggerInterface;
19
use Psr\Log\NullLogger;
20
use Symfony\Component\DependencyInjection\ContainerInterface;
21
use Symfony\Component\HttpFoundation\Response;
22
use Symfony\Component\HttpKernel\Exception\HttpException;
23
use Symfony\Component\HttpKernel\Exception\ServiceUnavailableHttpException;
24
25
/**
26
 * A repository is responsible for retrieving data from wherever it lives (databases, APIs, filesystems, etc.)
27
 */
28
abstract class Repository
29
{
30
    /** @var ContainerInterface The application's DI container. */
31
    protected $container;
32
33
    /** @var Connection The database connection to the meta database. */
34
    private $metaConnection;
35
36
    /** @var Connection The database connection to other tools' databases.  */
37
    private $toolsConnection;
38
39
    /** @var CacheItemPoolInterface The cache. */
40
    protected $cache;
41
42
    /** @var LoggerInterface The logger. */
43
    protected $log;
44
45
    /** @var string Prefix URL for where the dblists live. Will be followed by i.e. 's1.dblist' */
46
    public const DBLISTS_URL = 'https://noc.wikimedia.org/conf/dblists/';
47
48
    /**
49
     * Create a new Repository with nothing but a null-logger.
50
     */
51
    public function __construct()
52
    {
53
        $this->log = new NullLogger();
54
    }
55
56
    /**
57
     * Set the DI container.
58
     * @param ContainerInterface $container
59
     */
60
    public function setContainer(ContainerInterface $container): void
61
    {
62
        $this->container = $container;
63
        $this->cache = $container->get('cache.app');
64
        $this->log = $container->get('logger');
65
    }
66
67
    /**
68
     * Is XTools connecting to WMF Labs?
69
     * @return bool
70
     * @codeCoverageIgnore
71
     */
72
    public function isLabs(): bool
73
    {
74
        return (bool)$this->container->getParameter('app.is_labs');
75
    }
76
77
    /***************
78
     * CONNECTIONS *
79
     ***************/
80
81
    /**
82
     * Get the database connection for the 'meta' database.
83
     * @return Connection
84
     * @codeCoverageIgnore
85
     */
86
    protected function getMetaConnection(): Connection
87
    {
88
        if (!$this->metaConnection instanceof Connection) {
0 ignored issues
show
introduced by
$this->metaConnection is always a sub-type of Doctrine\DBAL\Connection.
Loading history...
89
            $this->metaConnection = $this->getProjectsConnection('meta');
90
        }
91
        return $this->metaConnection;
92
    }
93
94
    /**
95
     * Get a database connection for the given database.
96
     * @param Project|string $project Project instance, database name (i.e. 'enwiki'), or slice (i.e. 's1').
97
     * @return Connection
98
     * @codeCoverageIgnore
99
     */
100
    protected function getProjectsConnection($project): Connection
101
    {
102
        if (is_string($project)) {
103
            if (1 === preg_match('/^s\d+$/', $project)) {
104
                $slice = $project;
105
            } else {
106
                // Assume database name. Remove _p if given.
107
                $db = str_replace('_p', '', $project);
108
                $slice = $this->getDbList()[$db];
109
            }
110
        } else {
111
            $slice = $this->getDbList()[$project->getDatabaseName()];
112
        }
113
114
        return $this->container->get('doctrine')
115
            ->getConnection('toolforge_'.$slice);
116
    }
117
118
    /**
119
     * Get the database connection for the 'tools' database (the one that other tools store data in).
120
     * @return Connection
121
     * @codeCoverageIgnore
122
     */
123
    protected function getToolsConnection(): Connection
124
    {
125
        if (!$this->toolsConnection instanceof Connection) {
0 ignored issues
show
introduced by
$this->toolsConnection is always a sub-type of Doctrine\DBAL\Connection.
Loading history...
126
            $this->toolsConnection = $this->container
127
                ->get('doctrine')
128
                ->getManager('toolsdb')
129
                ->getConnection();
130
        }
131
        return $this->toolsConnection;
132
    }
133
134
    /**
135
     * Fetch and concatenate all the dblists into one array.
136
     * Based on ToolforgeBundle https://github.com/wikimedia/ToolforgeBundle/blob/master/Service/ReplicasClient.php
137
     * License: GPL 3.0 or later
138
     * @return string[] Keys are database names (i.e. 'enwiki'), values are the slices (i.e. 's1').
139
     */
140
    protected function getDbList(): array
141
    {
142
        $cacheKey = 'dblists';
143
        if ($this->cache->hasItem($cacheKey)) {
144
            return $this->cache->getItem($cacheKey)->get();
145
        }
146
147
        /** @var Client $client */
148
        $client = $this->container->get('eight_points_guzzle.client.xtools');
149
150
        $dbList = [];
151
        $exists = true;
152
        $i = 0;
153
154
        while ($exists) {
155
            $i += 1;
156
            $response = $client->request('GET', self::DBLISTS_URL."s$i.dblist", ['http_errors' => false]);
157
            $exists = in_array(
158
                $response->getStatusCode(),
159
                [Response::HTTP_OK, Response::HTTP_NOT_MODIFIED]
160
            ) && $i < 50; // Safeguard
161
162
            if (!$exists) {
163
                break;
164
            }
165
166
            $lines = explode("\n", $response->getBody()->getContents());
167
            foreach ($lines as $line) {
168
                $line = trim($line);
169
                if (1 !== preg_match('/^#/', $line) && '' !== $line) {
170
                    // Skip comments and blank lines.
171
                    $dbList[$line] = "s$i";
172
                }
173
            }
174
        }
175
176
        // Manually add the meta and centralauth databases.
177
        $dbList['meta'] = 's7';
178
        $dbList['centralauth'] = 's7';
179
180
        // Cache for one week.
181
        return $this->setCache($cacheKey, $dbList, 'P1W');
182
    }
183
184
    /*****************
185
     * QUERY HELPERS *
186
     *****************/
187
188
    /**
189
     * Make a request to the MediaWiki API.
190
     * @param Project $project
191
     * @param array $params
192
     * @return array
193
     */
194
    public function executeApiRequest(Project $project, array $params): array
195
    {
196
        /** @var Client $client */
197
        $client = $this->container->get('eight_points_guzzle.client.xtools');
198
199
        return json_decode($client->request('GET', $project->getApiUrl(), [
200
            'query' => array_merge([
201
                'action' => 'query',
202
                'format' => 'json',
203
            ], $params),
204
        ])->getBody()->getContents(), true);
205
    }
206
207
    /**
208
     * Normalize and quote a table name for use in SQL.
209
     * @param string $databaseName
210
     * @param string $tableName
211
     * @param string|null $tableExtension Optional table extension, which will only get used if we're on labs.
212
     *   If null, table extensions are added as defined in table_map.yml. If a blank string, no extension is added.
213
     * @return string Fully-qualified and quoted table name.
214
     */
215
    public function getTableName(string $databaseName, string $tableName, ?string $tableExtension = null): string
216
    {
217
        $mapped = false;
218
219
        // This is a workaround for a one-to-many mapping
220
        // as required by Labs. We combine $tableName with
221
        // $tableExtension in order to generate the new table name
222
        if ($this->isLabs() && null !== $tableExtension) {
223
            $mapped = true;
224
            $tableName .=('' === $tableExtension ? '' : '_'.$tableExtension);
225
        } elseif ($this->container->hasParameter("app.table.$tableName")) {
226
            // Use the table specified in the table mapping configuration, if present.
227
            $mapped = true;
228
            $tableName = $this->container->getParameter("app.table.$tableName");
229
        }
230
231
        // For 'revision' and 'logging' tables (actually views) on Labs, use the indexed versions
232
        // (that have some rows hidden, e.g. for revdeleted users).
233
        // This is a safeguard in case table mapping isn't properly set up.
234
        $isLoggingOrRevision = in_array($tableName, ['revision', 'logging', 'archive']);
235
        if (!$mapped && $isLoggingOrRevision && $this->isLabs()) {
236
            $tableName .="_userindex";
237
        }
238
239
        // Figure out database name.
240
        // Use class variable for the database name if not set via function parameter.
241
        if ($this->isLabs() && '_p' != substr($databaseName, -2)) {
242
            // Append '_p' if this is labs.
243
            $databaseName .= '_p';
244
        }
245
246
        return "`$databaseName`.`$tableName`";
247
    }
248
249
    /**
250
     * Get a unique cache key for the given list of arguments. Assuming each argument of
251
     * your function should be accounted for, you can pass in them all with func_get_args:
252
     *   $this->getCacheKey(func_get_args(), 'unique key for function');
253
     * Arguments that are a model should implement their own getCacheKey() that returns
254
     * a unique identifier for an instance of that model. See User::getCacheKey() for example.
255
     * @param array|mixed $args Array of arguments or a single argument.
256
     * @param string $key Unique key for this function. If omitted the function name itself
257
     *   is used, which is determined using `debug_backtrace`.
258
     * @return string
259
     */
260
    public function getCacheKey($args, $key = null): string
261
    {
262
        if (null === $key) {
263
            $key = debug_backtrace()[1]['function'];
264
        }
265
266
        if (!is_array($args)) {
267
            $args = [$args];
268
        }
269
270
        // Start with base key.
271
        $cacheKey = $key;
272
273
        // Loop through and determine what values to use based on type of object.
274
        foreach ($args as $arg) {
275
            // Zero is an acceptable value.
276
            if ('' === $arg || null === $arg) {
277
                continue;
278
            }
279
280
            $cacheKey .= $this->getCacheKeyFromArg($arg);
281
        }
282
283
        // Remove reserved characters.
284
        return preg_replace('/[{}()\/\@\:"]/', '', $cacheKey);
285
    }
286
287
    /**
288
     * Get a cache-friendly string given an argument.
289
     * @param mixed $arg
290
     * @return string
291
     */
292
    private function getCacheKeyFromArg($arg): string
293
    {
294
        if (is_object($arg) && method_exists($arg, 'getCacheKey')) {
295
            return '.'.$arg->getCacheKey();
296
        } elseif (is_array($arg)) {
297
            // Assumed to be an array of objects that can be parsed into a string.
298
            return '.'.md5(implode('', $arg));
299
        } else {
300
            // Assumed to be a string, number or boolean.
301
            return '.'.md5((string)$arg);
302
        }
303
    }
304
305
    /**
306
     * Set the cache with given options.
307
     * @param string $cacheKey
308
     * @param mixed $value
309
     * @param string $duration Valid DateInterval string.
310
     * @return mixed The given $value.
311
     */
312
    public function setCache(string $cacheKey, $value, $duration = 'PT20M')
313
    {
314
        $cacheItem = $this->cache
315
            ->getItem($cacheKey)
316
            ->set($value)
317
            ->expiresAfter(new DateInterval($duration));
318
        $this->cache->save($cacheItem);
319
        return $value;
320
    }
321
322
    /********************************
323
     * DATABASE INTERACTION HELPERS *
324
     ********************************/
325
326
    /**
327
     * Creates WHERE conditions with date range to be put in query.
328
     * @param false|int $start Unix timestamp.
329
     * @param false|int $end Unix timestamp.
330
     * @param false|int $offset Unix timestamp. Used for pagination, will end up replacing $end.
331
     * @param string $tableAlias Alias of table FOLLOWED BY DOT.
332
     * @param string $field
333
     * @return string
334
     */
335
    public function getDateConditions(
336
        $start,
337
        $end,
338
        $offset = false,
339
        string $tableAlias = '',
340
        string $field = 'rev_timestamp'
341
    ) : string {
342
        $datesConditions = '';
343
344
        if (is_int($start)) {
345
            // Convert to YYYYMMDDHHMMSS.
346
            $start = date('Ymd', $start).'000000';
347
            $datesConditions .= " AND {$tableAlias}{$field} >= '$start'";
348
        }
349
350
        // When we're given an $offset, it basically replaces $end, except it's also a full timestamp.
351
        if (is_int($offset)) {
352
            $offset = date('YmdHis', $offset);
353
            $datesConditions .= " AND {$tableAlias}{$field} <= '$offset'";
354
        } elseif (is_int($end)) {
355
            $end = date('Ymd', $end) . '235959';
356
            $datesConditions .= " AND {$tableAlias}{$field} <= '$end'";
357
        }
358
359
        return $datesConditions;
360
    }
361
362
    /**
363
     * Execute a query using the projects connection, handling certain Exceptions.
364
     * @param Project|string $project Project instance, database name (i.e. 'enwiki'), or slice (i.e. 's1').
365
     * @param string $sql
366
     * @param array $params Parameters to bound to the prepared query.
367
     * @param int|null $timeout Maximum statement time in seconds. null will use the
368
     *   default specified by the app.query_timeout config parameter.
369
     * @return ResultStatement
370
     * @throws DriverException
371
     * @throws \Doctrine\DBAL\DBALException
372
     * @codeCoverageIgnore
373
     */
374
    public function executeProjectsQuery(
375
        $project,
376
        string $sql,
377
        array $params = [],
378
        ?int $timeout = null
379
    ): ResultStatement {
380
        try {
381
            $timeout = $timeout ?? $this->container->getParameter('app.query_timeout');
382
            $sql = "SET STATEMENT max_statement_time = $timeout FOR\n".$sql;
383
384
            return $this->getProjectsConnection($project)->executeQuery($sql, $params);
0 ignored issues
show
Security SQL Injection introduced by
$params can contain request data and is used in sql context(s) leading to a potential security vulnerability.

1 path for user data to reach this point

  1. $this->request->get('exclude_pattern', '') is assigned to property LargestPagesController::$params
    in src/Controller/LargestPagesController.php on line 41
  2. Read from property LargestPagesController::$params, and LargestPages::__construct() is called
    in src/Controller/LargestPagesController.php on line 81
  3. Enters via parameter $excludePattern
    in src/Model/LargestPages.php on line 32
  4. $excludePattern is assigned to property LargestPages::$excludePattern
    in src/Model/LargestPages.php on line 37
  5. Read from property LargestPages::$excludePattern, and LargestPagesRepository::getData() is called
    in src/Model/LargestPages.php on line 67
  6. Enters via parameter $excludePattern
    in src/Repository/LargestPagesRepository.php on line 47
  7. Repository::executeProjectsQuery() is called
    in src/Repository/LargestPagesRepository.php on line 71
  8. Enters via parameter $params
    in src/Repository/Repository.php on line 377

Used in sql context

  1. Connection::executeQuery() is called
    in src/Repository/Repository.php on line 384
  2. Enters via parameter $params
    in vendor/doctrine/dbal/lib/Doctrine/DBAL/Connection.php on line 1284
  3. Data is passed through expandListParameters()
    in vendor/doctrine/dbal/lib/Doctrine/DBAL/Connection.php on line 1299
  4. Doctrine\DBAL\SQLParserUtils::expandListParameters($sql, $params, $types) is assigned to $params
    in vendor/doctrine/dbal/lib/Doctrine/DBAL/Connection.php on line 1299
  5. DB2Statement::execute() is called
    in vendor/doctrine/dbal/lib/Doctrine/DBAL/Connection.php on line 1306
  6. Enters via parameter $params
    in vendor/doctrine/dbal/lib/Doctrine/DBAL/Driver/IBMDB2/DB2Statement.php on line 216
  7. db2_execute() is called
    in vendor/doctrine/dbal/lib/Doctrine/DBAL/Driver/IBMDB2/DB2Statement.php on line 238

Preventing SQL Injection

There are two options to prevent SQL injection. Generally, it is recommended to use parameter binding:

$stmt = mysqli_prepare("SELECT * FROM users WHERE name = ?");
$stmt->bind_param("s", $taintedUserName);

An alternative – although generally not recommended – is to escape your data manually:

$mysqli = new mysqli('localhost', 'user', 'pass', 'dbname');

$escaped = $mysqli->real_escape_string($taintedUserName);
$mysqli->query("SELECT * FROM users WHERE name = '".$escaped."'");

General Strategies to prevent injection

In general, it is advisable to prevent any user-data to reach this point. This can be done by white-listing certain values:

if ( ! in_array($value, array('this-is-allowed', 'and-this-too'), true)) {
    throw new \InvalidArgumentException('This input is not allowed.');
}

For numeric data, we recommend to explicitly cast the data:

$sanitized = (integer) $tainted;
Loading history...
385
        } catch (DriverException $e) {
386
            $this->handleDriverError($e, $timeout);
387
        }
388
    }
389
390
    /**
391
     * Execute a query using the projects connection, handling certain Exceptions.
392
     * @param QueryBuilder $qb
393
     * @param int|null $timeout Maximum statement time in seconds. null will use the
394
     *   default specified by the app.query_timeout config parameter.
395
     * @return ResultStatement
396
     * @throws HttpException
397
     * @throws DriverException
398
     * @codeCoverageIgnore
399
     */
400
    public function executeQueryBuilder(QueryBuilder $qb, ?int $timeout = null): ResultStatement
401
    {
402
        try {
403
            $timeout = $timeout ?? $this->container->getParameter('app.query_timeout');
404
            $sql = "SET STATEMENT max_statement_time = $timeout FOR\n".$qb->getSQL();
405
            return $qb->getConnection()->executeQuery($sql, $qb->getParameters(), $qb->getParameterTypes());
406
        } catch (DriverException $e) {
407
            $this->handleDriverError($e, $timeout);
408
        }
409
    }
410
411
    /**
412
     * Special handling of some DriverExceptions, otherwise original Exception is thrown.
413
     * @param DriverException $e
414
     * @param int|null $timeout Timeout value, if applicable. This is passed to the i18n message.
415
     * @throws HttpException
416
     * @throws DriverException
417
     * @codeCoverageIgnore
418
     */
419
    private function handleDriverError(DriverException $e, ?int $timeout): void
420
    {
421
        // If no value was passed for the $timeout, it must be the default.
422
        if (null === $timeout) {
423
            $timeout = $this->container->getParameter('app.query_timeout');
424
        }
425
426
        if (1226 === $e->getErrorCode()) {
427
            throw new ServiceUnavailableHttpException(30, 'error-service-overload', null, 503);
428
        } elseif (in_array($e->getErrorCode(), [1969, 2006, 2013])) {
429
            // FIXME: Attempt to reestablish connection on 2006 error (MySQL server has gone away).
430
            throw new HttpException(504, 'error-query-timeout', null, [], $timeout);
431
        } else {
432
            throw $e;
433
        }
434
    }
435
}
436