Repository::getCacheKeyFromArg()   A
last analyzed

Complexity

Conditions 4
Paths 3

Size

Total Lines 10
Code Lines 6

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
cc 4
eloc 6
nc 3
nop 1
dl 0
loc 10
rs 10
c 0
b 0
f 0
1
<?php
2
3
declare(strict_types = 1);
4
5
namespace App\Repository;
6
7
use App\Exception\BadGatewayException;
8
use App\Model\Project;
9
use DateInterval;
10
use Doctrine\DBAL\Connection;
11
use Doctrine\DBAL\Driver\ResultStatement;
12
use Doctrine\DBAL\Exception\DriverException;
13
use Doctrine\DBAL\Query\QueryBuilder;
14
use Doctrine\Persistence\ManagerRegistry;
15
use GuzzleHttp\Client;
16
use GuzzleHttp\Exception\ConnectException;
17
use GuzzleHttp\Exception\ServerException;
18
use Psr\Cache\CacheItemPoolInterface;
19
use Psr\Log\LoggerInterface;
20
use Symfony\Component\DependencyInjection\ParameterBag\ParameterBagInterface;
21
use Symfony\Component\HttpFoundation\Response;
22
use Symfony\Component\HttpKernel\Exception\HttpException;
23
use Symfony\Component\HttpKernel\Exception\ServiceUnavailableHttpException;
24
25
/**
26
 * A repository is responsible for retrieving data from wherever it lives (databases, APIs, filesystems, etc.)
27
 */
28
abstract class Repository
29
{
30
    protected CacheItemPoolInterface $cache;
31
    protected Client $guzzle;
32
    protected LoggerInterface $logger;
33
    protected ManagerRegistry $managerRegistry;
34
    protected ParameterBagInterface $parameterBag;
35
36
    /** @var Connection The database connection to the meta database. */
37
    private Connection $metaConnection;
38
39
    /** @var Connection The database connection to other tools' databases.  */
40
    private Connection $toolsConnection;
41
42
    /** @var bool Whether this is configured as a WMF installation. */
43
    protected bool $isWMF;
44
45
    /** @var int */
46
    protected int $queryTimeout;
47
48
    /** @var string Prefix URL for where the dblists live. Will be followed by i.e. 's1.dblist' */
49
    public const DBLISTS_URL = 'https://noc.wikimedia.org/conf/dblists/';
50
51
    /**
52
     * Create a new Repository.
53
     * @param ManagerRegistry $managerRegistry
54
     * @param Client $guzzle
55
     * @param LoggerInterface $logger
56
     * @param ParameterBagInterface $parameterBag
57
     * @param bool $isWMF
58
     * @param int $queryTimeout
59
     */
60
    public function __construct(
61
        ManagerRegistry $managerRegistry,
62
        CacheItemPoolInterface $cache,
63
        Client $guzzle,
64
        LoggerInterface $logger,
65
        ParameterBagInterface $parameterBag,
66
        bool $isWMF,
67
        int $queryTimeout
68
    ) {
69
        $this->managerRegistry = $managerRegistry;
70
        $this->cache = $cache;
71
        $this->guzzle = $guzzle;
72
        $this->logger = $logger;
73
        $this->parameterBag = $parameterBag;
74
        $this->isWMF = $isWMF;
75
        $this->queryTimeout = $queryTimeout;
76
    }
77
78
    /***************
79
     * CONNECTIONS *
80
     ***************/
81
82
    /**
83
     * Get the database connection for the 'meta' database.
84
     * @return Connection
85
     * @codeCoverageIgnore
86
     */
87
    protected function getMetaConnection(): Connection
88
    {
89
        if (!isset($this->metaConnection)) {
90
            $this->metaConnection = $this->getProjectsConnection('meta');
91
        }
92
        return $this->metaConnection;
93
    }
94
95
    /**
96
     * Get a database connection for the given database.
97
     * @param Project|string $project Project instance, database name (i.e. 'enwiki'), or slice (i.e. 's1').
98
     * @return Connection
99
     * @codeCoverageIgnore
100
     */
101
    protected function getProjectsConnection($project): Connection
102
    {
103
        if (is_string($project)) {
104
            if (1 === preg_match('/^s\d+$/', $project)) {
105
                $slice = $project;
106
            } else {
107
                // Assume database name. Remove _p if given.
108
                $db = str_replace('_p', '', $project);
109
                $slice = $this->getDbList()[$db];
110
            }
111
        } else {
112
            $slice = $this->getDbList()[$project->getDatabaseName()];
113
        }
114
115
        return $this->managerRegistry->getConnection('toolforge_'.$slice);
116
    }
117
118
    /**
119
     * Get the database connection for the 'tools' database (the one that other tools store data in).
120
     * @return Connection
121
     * @codeCoverageIgnore
122
     */
123
    protected function getToolsConnection(): Connection
124
    {
125
        if (!isset($this->toolsConnection)) {
126
            $this->toolsConnection = $this->managerRegistry->getConnection('toolsdb');
127
        }
128
        return $this->toolsConnection;
129
    }
130
131
    /**
132
     * Fetch and concatenate all the dblists into one array.
133
     * Based on ToolforgeBundle https://github.com/wikimedia/ToolforgeBundle/blob/master/Service/ReplicasClient.php
134
     * License: GPL 3.0 or later
135
     * @return string[] Keys are database names (i.e. 'enwiki'), values are the slices (i.e. 's1').
136
     * @codeCoverageIgnore
137
     */
138
    protected function getDbList(): array
139
    {
140
        $cacheKey = 'dblists';
141
        if ($this->cache->hasItem($cacheKey)) {
142
            return $this->cache->getItem($cacheKey)->get();
143
        }
144
145
        $dbList = [];
146
        $exists = true;
147
        $i = 0;
148
149
        while ($exists) {
150
            $i += 1;
151
            $response = $this->guzzle->request('GET', self::DBLISTS_URL."s$i.dblist", ['http_errors' => false]);
152
            $exists = in_array(
153
                $response->getStatusCode(),
154
                [Response::HTTP_OK, Response::HTTP_NOT_MODIFIED]
155
            ) && $i < 50; // Safeguard
156
157
            if (!$exists) {
158
                break;
159
            }
160
161
            $lines = explode("\n", $response->getBody()->getContents());
162
            foreach ($lines as $line) {
163
                $line = trim($line);
164
                if (1 !== preg_match('/^#/', $line) && '' !== $line) {
165
                    // Skip comments and blank lines.
166
                    $dbList[$line] = "s$i";
167
                }
168
            }
169
        }
170
171
        // Manually add the meta and centralauth databases.
172
        $dbList['meta'] = 's7';
173
        $dbList['centralauth'] = 's7';
174
175
        // Cache for one week.
176
        return $this->setCache($cacheKey, $dbList, 'P1W');
177
    }
178
179
    /*****************
180
     * QUERY HELPERS *
181
     *****************/
182
183
    /**
184
     * Make a request to the MediaWiki API.
185
     * @param Project $project
186
     * @param array $params
187
     * @return array
188
     * @throws BadGatewayException
189
     */
190
    public function executeApiRequest(Project $project, array $params): array
191
    {
192
        try {
193
            return json_decode($this->guzzle->request('GET', $project->getApiUrl(), [
194
                'query' => array_merge([
195
                    'action' => 'query',
196
                    'format' => 'json',
197
                ], $params),
198
            ])->getBody()->getContents(), true);
199
        } catch (ServerException|ConnectException $e) {
200
            throw new BadGatewayException('api-error-wikimedia', ['Wikimedia'], $e);
201
        }
202
    }
203
204
    /**
205
     * Normalize and quote a table name for use in SQL.
206
     * @param string $databaseName
207
     * @param string $tableName
208
     * @param string|null $tableExtension Optional table extension, which will only get used if we're on labs.
209
     *   If null, table extensions are added as defined in table_map.yml. If a blank string, no extension is added.
210
     * @return string Fully-qualified and quoted table name.
211
     */
212
    public function getTableName(string $databaseName, string $tableName, ?string $tableExtension = null): string
213
    {
214
        $mapped = false;
215
216
        // This is a workaround for a one-to-many mapping
217
        // as required by Labs. We combine $tableName with
218
        // $tableExtension in order to generate the new table name
219
        if ($this->isWMF && null !== $tableExtension) {
220
            $mapped = true;
221
            $tableName .=('' === $tableExtension ? '' : '_'.$tableExtension);
222
        } elseif ($this->parameterBag->has("app.table.$tableName")) {
223
            // Use the table specified in the table mapping configuration, if present.
224
            $mapped = true;
225
            $tableName = $this->parameterBag->get("app.table.$tableName");
226
        }
227
228
        // For 'revision' and 'logging' tables (actually views) on Labs, use the indexed versions
229
        // (that have some rows hidden, e.g. for revdeleted users).
230
        // This is a safeguard in case table mapping isn't properly set up.
231
        $isLoggingOrRevision = in_array($tableName, ['revision', 'logging', 'archive']);
232
        if (!$mapped && $isLoggingOrRevision && $this->isWMF) {
233
            $tableName .="_userindex";
234
        }
235
236
        // Figure out database name.
237
        // Use class variable for the database name if not set via function parameter.
238
        if ($this->isWMF && '_p' != substr($databaseName, -2)) {
239
            // Append '_p' if this is labs.
240
            $databaseName .= '_p';
241
        }
242
243
        return "`$databaseName`.`$tableName`";
244
    }
245
246
    /**
247
     * Get a unique cache key for the given list of arguments. Assuming each argument of
248
     * your function should be accounted for, you can pass in them all with func_get_args:
249
     *   $this->getCacheKey(func_get_args(), 'unique key for function');
250
     * Arguments that are a model should implement their own getCacheKey() that returns
251
     * a unique identifier for an instance of that model. See User::getCacheKey() for example.
252
     * @param array|mixed $args Array of arguments or a single argument.
253
     * @param string|null $key Unique key for this function. If omitted the function name itself
254
     *   is used, which is determined using `debug_backtrace`.
255
     * @return string
256
     */
257
    public function getCacheKey($args, ?string $key = null): string
258
    {
259
        if (null === $key) {
260
            $key = debug_backtrace()[1]['function'];
261
        }
262
263
        if (!is_array($args)) {
264
            $args = [$args];
265
        }
266
267
        // Start with base key.
268
        $cacheKey = $key;
269
270
        // Loop through and determine what values to use based on type of object.
271
        foreach ($args as $arg) {
272
            // Zero is an acceptable value.
273
            if ('' === $arg || null === $arg) {
274
                continue;
275
            }
276
277
            $cacheKey .= $this->getCacheKeyFromArg($arg);
278
        }
279
280
        // Remove reserved characters.
281
        return preg_replace('/[{}()\/@:"]/', '', $cacheKey);
282
    }
283
284
    /**
285
     * Get a cache-friendly string given an argument.
286
     * @param mixed $arg
287
     * @return string
288
     */
289
    private function getCacheKeyFromArg($arg): string
290
    {
291
        if (is_object($arg) && method_exists($arg, 'getCacheKey')) {
292
            return '.'.$arg->getCacheKey();
293
        } elseif (is_array($arg)) {
294
            // Assumed to be an array of objects that can be parsed into a string.
295
            return '.'.md5(implode('', $arg));
296
        } else {
297
            // Assumed to be a string, number or boolean.
298
            return '.'.md5((string)$arg);
299
        }
300
    }
301
302
    /**
303
     * Set the cache with given options.
304
     * @param string $cacheKey
305
     * @param mixed $value
306
     * @param string $duration Valid DateInterval string.
307
     * @return mixed The given $value.
308
     */
309
    public function setCache(string $cacheKey, $value, string $duration = 'PT20M')
310
    {
311
        $cacheItem = $this->cache
312
            ->getItem($cacheKey)
313
            ->set($value)
314
            ->expiresAfter(new DateInterval($duration));
315
        $this->cache->save($cacheItem);
316
        return $value;
317
    }
318
319
    /********************************
320
     * DATABASE INTERACTION HELPERS *
321
     ********************************/
322
323
    /**
324
     * Creates WHERE conditions with date range to be put in query.
325
     * @param false|int $start Unix timestamp.
326
     * @param false|int $end Unix timestamp.
327
     * @param false|int $offset Unix timestamp. Used for pagination, will end up replacing $end.
328
     * @param string $tableAlias Alias of table FOLLOWED BY DOT.
329
     * @param string $field
330
     * @return string
331
     */
332
    public function getDateConditions(
333
        $start,
334
        $end,
335
        $offset = false,
336
        string $tableAlias = '',
337
        string $field = 'rev_timestamp'
338
    ) : string {
339
        $datesConditions = '';
340
341
        if (is_int($start)) {
342
            // Convert to YYYYMMDDHHMMSS.
343
            $start = date('Ymd', $start).'000000';
344
            $datesConditions .= " AND $tableAlias{$field} >= '$start'";
345
        }
346
347
        // When we're given an $offset, it basically replaces $end, except it's also a full timestamp.
348
        if (is_int($offset)) {
349
            $offset = date('YmdHis', $offset);
350
            $datesConditions .= " AND $tableAlias{$field} <= '$offset'";
351
        } elseif (is_int($end)) {
352
            $end = date('Ymd', $end) . '235959';
353
            $datesConditions .= " AND $tableAlias{$field} <= '$end'";
354
        }
355
356
        return $datesConditions;
357
    }
358
359
    /**
360
     * Execute a query using the projects connection, handling certain Exceptions.
361
     * @param Project|string $project Project instance, database name (i.e. 'enwiki'), or slice (i.e. 's1').
362
     * @param string $sql
363
     * @param array $params Parameters to bound to the prepared query.
364
     * @param int|null $timeout Maximum statement time in seconds. null will use the
365
     *   default specified by the APP_QUERY_TIMEOUT env variable.
366
     * @return ResultStatement
367
     * @throws DriverException
368
     * @codeCoverageIgnore
369
     */
370
    public function executeProjectsQuery(
371
        $project,
372
        string $sql,
373
        array $params = [],
374
        ?int $timeout = null
375
    ): ResultStatement {
376
        try {
377
            $timeout = $timeout ?? $this->queryTimeout;
378
            $sql = "SET STATEMENT max_statement_time = $timeout FOR\n".$sql;
379
380
            return $this->getProjectsConnection($project)->executeQuery($sql, $params);
0 ignored issues
show
Security SQL Injection introduced by
$params can contain request data and is used in sql context(s) leading to a potential security vulnerability.

1 path for user data to reach this point

  1. ParameterBag::get() returns request data
    in vendor/symfony/http-foundation/ParameterBag.php on line 90
  2. $this->attributes->get($key, $this) is assigned to $result
    in vendor/symfony/http-foundation/Request.php on line 711
  3. $result is returned
    in vendor/symfony/http-foundation/Request.php on line 712
  4. LargestPages::__construct() is called
    in src/Controller/LargestPagesController.php on line 68
  5. Enters via parameter $excludePattern
    in src/Model/LargestPages.php on line 35
  6. $excludePattern is assigned to property LargestPages::$excludePattern
    in src/Model/LargestPages.php on line 41
  7. Read from property LargestPages::$excludePattern, and LargestPagesRepository::getData() is called
    in src/Model/LargestPages.php on line 72
  8. Enters via parameter $excludePattern
    in src/Repository/LargestPagesRepository.php on line 79
  9. Repository::executeProjectsQuery() is called
    in src/Repository/LargestPagesRepository.php on line 103
  10. Enters via parameter $params
    in src/Repository/Repository.php on line 373

Used in sql context

  1. Connection::executeQuery() is called
    in src/Repository/Repository.php on line 371
  2. Enters via parameter $params
    in vendor/doctrine/dbal/lib/Doctrine/DBAL/Connection.php on line 1284
  3. Data is passed through expandListParameters()
    in vendor/doctrine/dbal/lib/Doctrine/DBAL/Connection.php on line 1299
  4. Doctrine\DBAL\SQLParserUtils::expandListParameters($sql, $params, $types) is assigned to $params
    in vendor/doctrine/dbal/lib/Doctrine/DBAL/Connection.php on line 1299
  5. DB2Statement::execute() is called
    in vendor/doctrine/dbal/lib/Doctrine/DBAL/Connection.php on line 1306
  6. Enters via parameter $params
    in vendor/doctrine/dbal/lib/Doctrine/DBAL/Driver/IBMDB2/DB2Statement.php on line 216
  7. db2_execute() is called
    in vendor/doctrine/dbal/lib/Doctrine/DBAL/Driver/IBMDB2/DB2Statement.php on line 238

Preventing SQL Injection

There are two options to prevent SQL injection. Generally, it is recommended to use parameter binding:

$stmt = mysqli_prepare("SELECT * FROM users WHERE name = ?");
$stmt->bind_param("s", $taintedUserName);

An alternative – although generally not recommended – is to escape your data manually:

$mysqli = new mysqli('localhost', 'user', 'pass', 'dbname');

$escaped = $mysqli->real_escape_string($taintedUserName);
$mysqli->query("SELECT * FROM users WHERE name = '".$escaped."'");

General Strategies to prevent injection

In general, it is advisable to prevent any user-data to reach this point. This can be done by white-listing certain values:

if ( ! in_array($value, array('this-is-allowed', 'and-this-too'), true)) {
    throw new \InvalidArgumentException('This input is not allowed.');
}

For numeric data, we recommend to explicitly cast the data:

$sanitized = (integer) $tainted;
Loading history...
381
        } catch (DriverException $e) {
382
            $this->handleDriverError($e, $timeout);
383
        }
384
    }
385
386
    /**
387
     * Execute a query using the projects connection, handling certain Exceptions.
388
     * @param QueryBuilder $qb
389
     * @param int|null $timeout Maximum statement time in seconds. null will use the
390
     *   default specified by the APP_QUERY_TIMEOUT env variable.
391
     * @return ResultStatement
392
     * @throws HttpException
393
     * @throws DriverException
394
     * @codeCoverageIgnore
395
     */
396
    public function executeQueryBuilder(QueryBuilder $qb, ?int $timeout = null): ResultStatement
397
    {
398
        try {
399
            $timeout = $timeout ?? $this->queryTimeout;
400
            $sql = "SET STATEMENT max_statement_time = $timeout FOR\n".$qb->getSQL();
401
            return $qb->getConnection()->executeQuery($sql, $qb->getParameters(), $qb->getParameterTypes());
402
        } catch (DriverException $e) {
403
            $this->handleDriverError($e, $timeout);
404
        }
405
    }
406
407
    /**
408
     * Special handling of some DriverExceptions, otherwise original Exception is thrown.
409
     * @param DriverException $e
410
     * @param int|null $timeout Timeout value, if applicable. This is passed to the i18n message.
411
     * @throws HttpException
412
     * @throws DriverException
413
     * @codeCoverageIgnore
414
     */
415
    private function handleDriverError(DriverException $e, ?int $timeout): void
416
    {
417
        // If no value was passed for the $timeout, it must be the default.
418
        if (null === $timeout) {
419
            $timeout = $this->queryTimeout;
420
        }
421
422
        if (1226 === $e->getErrorCode()) {
423
            throw new ServiceUnavailableHttpException(30, 'error-service-overload', null, 503);
424
        } elseif (in_array($e->getErrorCode(), [1969, 2006, 2013])) {
425
            // FIXME: Attempt to reestablish connection on 2006 error (MySQL server has gone away).
426
            throw new HttpException(Response::HTTP_GATEWAY_TIMEOUT, 'error-query-timeout', null, [], $timeout);
427
        } else {
428
            throw $e;
429
        }
430
    }
431
}
432