Test Failed
Pull Request — master (#377)
by MusikAnimal
39:18
created

Repository::getCacheKey()   A

Complexity

Conditions 6
Paths 12

Size

Total Lines 25
Code Lines 10

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 8
CRAP Score 6

Importance

Changes 0
Metric Value
cc 6
eloc 10
nc 12
nop 2
dl 0
loc 25
rs 9.2222
c 0
b 0
f 0
ccs 8
cts 8
cp 1
crap 6
1
<?php
2
/**
3
 * This file contains only the Repository class.
4
 */
5
6
declare(strict_types = 1);
7
8
namespace AppBundle\Repository;
9
10
use AppBundle\Model\Project;
11
use DateInterval;
12
use Doctrine\DBAL\Connection;
13
use Doctrine\DBAL\Driver\ResultStatement;
14
use Doctrine\DBAL\Exception\DriverException;
15
use Doctrine\DBAL\Query\QueryBuilder;
16
use GuzzleHttp\Client;
17
use Psr\Cache\CacheItemPoolInterface;
18
use Psr\Log\LoggerInterface;
19
use Psr\Log\NullLogger;
20
use Symfony\Component\DependencyInjection\ContainerInterface;
21
use Symfony\Component\HttpFoundation\Response;
22
use Symfony\Component\HttpKernel\Exception\HttpException;
23
use Symfony\Component\HttpKernel\Exception\ServiceUnavailableHttpException;
24
25
/**
26
 * A repository is responsible for retrieving data from wherever it lives (databases, APIs, filesystems, etc.)
27
 */
28
abstract class Repository
29
{
30
    /** @var ContainerInterface The application's DI container. */
31
    protected $container;
32
33
    /** @var Connection The database connection to the meta database. */
34
    private $metaConnection;
35
36
    /** @var Connection The database connection to the projects' databases. */
37
    private $projectsConnection;
0 ignored issues
show
introduced by
The private property $projectsConnection is not used, and could be removed.
Loading history...
38
39
    /** @var Connection The database connection to other tools' databases.  */
40
    private $toolsConnection;
41
42
    /** @var CacheItemPoolInterface The cache. */
43
    protected $cache;
44
45
    /** @var LoggerInterface The logger. */
46
    protected $log;
47
48
    /** @var string Prefix URL for where the dblists live. Will be followed by i.e. 's1.dblist' */
49
    public const DBLISTS_URL = 'https://noc.wikimedia.org/conf/dblists/';
50 18
51
    /**
52 18
     * Create a new Repository with nothing but a null-logger.
53 18
     */
54
    public function __construct()
55
    {
56
        $this->log = new NullLogger();
57
    }
58
59 17
    /**
60
     * Set the DI container.
61 17
     * @param ContainerInterface $container
62 17
     */
63 17
    public function setContainer(ContainerInterface $container): void
64 17
    {
65
        $this->container = $container;
66
        $this->cache = $container->get('cache.app');
67
        $this->log = $container->get('logger');
68
    }
69
70
    /**
71
     * Is XTools connecting to WMF Labs?
72
     * @return bool
73
     * @codeCoverageIgnore
74
     */
75
    public function isLabs(): bool
76
    {
77
        return (bool)$this->container->getParameter('app.is_labs');
78
    }
79
80
    /***************
81
     * CONNECTIONS *
82
     ***************/
83
84
    /**
85
     * Get the database connection for the 'meta' database.
86
     * @return Connection
87
     * @codeCoverageIgnore
88
     */
89
    protected function getMetaConnection(): Connection
90
    {
91
        if (!$this->metaConnection instanceof Connection) {
0 ignored issues
show
introduced by
$this->metaConnection is always a sub-type of Doctrine\DBAL\Connection.
Loading history...
92
            $this->metaConnection = $this->getProjectsConnection('meta');
93
        }
94
        return $this->metaConnection;
95
    }
96
97
    /**
98
     * Get a database connection for the given database.
99
     * @param Project|string $project Project instance, database name (i.e. 'enwiki'), or slice (i.e. 's1').
100
     * @return Connection
101
     * @codeCoverageIgnore
102
     */
103
    protected function getProjectsConnection($project): Connection
104
    {
105
        if (is_string($project)) {
106
            if (1 === preg_match('/^s\d+$/', $project)) {
107
                $slice = $project;
108
            } else {
109
                // Assume database name. Remove _p if given.
110
                $db = str_replace('_p', '', $project);
111
                $slice = $this->getDbList()[$db];
112
            }
113
        } elseif ($project instanceof Project) {
0 ignored issues
show
introduced by
$project is always a sub-type of AppBundle\Model\Project.
Loading history...
114
            $slice = $this->getDbList()[$project->getDatabaseName()];
115
        } else {
116
            throw new \InvalidArgumentException('Invalid $project given');
117
        }
118
119
        return $this->container->get('doctrine')
120
            ->getConnection('toolforge_'.$slice);
121
    }
122
123
    /**
124
     * Get the database connection for the 'tools' database (the one that other tools store data in).
125
     * @return Connection
126
     * @codeCoverageIgnore
127
     */
128
    protected function getToolsConnection(): Connection
129
    {
130
        if (!$this->toolsConnection instanceof Connection) {
0 ignored issues
show
introduced by
$this->toolsConnection is always a sub-type of Doctrine\DBAL\Connection.
Loading history...
131
            $this->toolsConnection = $this->container
132
                ->get('doctrine')
133
                ->getManager('toolsdb')
134
                ->getConnection();
135
        }
136
        return $this->toolsConnection;
137
    }
138 2
139
    /**
140
     * Fetch and concatenate all the dblists into one array.
141 2
     * Based on ToolforgeBundle https://github.com/wikimedia/ToolforgeBundle/blob/master/Service/ReplicasClient.php
142
     * License: GPL 3.0 or later
143 2
     * @return string[] Keys are database names (i.e. 'enwiki'), values are the slices (i.e. 's1').
144 2
     */
145 2
    protected function getDbList(): array
146
    {
147 2
        $cacheKey = 'dblists';
148 2
        if ($this->cache->hasItem($cacheKey)) {
149
            return $this->cache->getItem($cacheKey)->get();
150
        }
151
152
        /** @var Client $client */
153
        $client = $this->container->get('eight_points_guzzle.client.xtools');
154
155
        $dbList = [];
156
        $exists = true;
157
        $i = 0;
158
159 1
        while ($exists) {
160
            $i += 1;
161 1
            $response = $client->request('GET', self::DBLISTS_URL."s$i.dblist", ['http_errors' => false]);
162
            $exists = in_array(
163
                $response->getStatusCode(),
164
                [Response::HTTP_OK, Response::HTTP_NOT_MODIFIED]
165
            ) && $i < 50; // Safeguard
166 1
167
            if (!$exists) {
168
                break;
169 1
            }
170
171
            $lines = explode("\n", $response->getBody()->getContents());
172
            foreach ($lines as $line) {
173
                $line = trim($line);
174
                if (1 !== preg_match('/^#/', $line) && '' !== $line) {
175
                    // Skip comments and blank lines.
176
                    $dbList[$line] = "s$i";
177
                }
178 1
            }
179 1
        }
180
181
        // Manually add the meta and centralauth databases.
182
        $dbList['meta'] = 's7';
183
        $dbList['centralauth'] = 's7';
184
185 1
        // Cache for one week.
186
        return $this->setCache($cacheKey, $dbList, 'P1W');
187
    }
188
189
    /*****************
190 1
     * QUERY HELPERS *
191
     *****************/
192
193
    /**
194
     * Make a request to the MediaWiki API.
195
     * @param Project $project
196
     * @param array $params
197
     * @return array
198
     */
199
    public function executeApiRequest(Project $project, array $params): array
200
    {
201
        /** @var Client $client */
202
        $client = $this->container->get('eight_points_guzzle.client.xtools');
203
204 8
        return json_decode($client->request('GET', $project->getApiUrl(), [
205
            'query' => array_merge([
206 8
                'action' => 'query',
207 1
                'format' => 'json',
208
            ], $params),
209
        ])->getBody()->getContents(), true);
210 8
    }
211 8
212
    /**
213
     * Normalize and quote a table name for use in SQL.
214
     * @param string $databaseName
215 8
     * @param string $tableName
216
     * @param string|null $tableExtension Optional table extension, which will only get used if we're on labs.
217
     *   If null, table extensions are added as defined in table_map.yml. If a blank string, no extension is added.
218 8
     * @return string Fully-qualified and quoted table name.
219
     */
220 8
    public function getTableName(string $databaseName, string $tableName, ?string $tableExtension = null): string
221 1
    {
222
        $mapped = false;
223
224 8
        // This is a workaround for a one-to-many mapping
225
        // as required by Labs. We combine $tableName with
226
        // $tableExtension in order to generate the new table name
227
        if ($this->isLabs() && null !== $tableExtension) {
228 8
            $mapped = true;
229
            $tableName .=('' === $tableExtension ? '' : '_'.$tableExtension);
230
        } elseif ($this->container->hasParameter("app.table.$tableName")) {
231
            // Use the table specified in the table mapping configuration, if present.
232
            $mapped = true;
233
            $tableName = $this->container->getParameter("app.table.$tableName");
234
        }
235
236 8
        // For 'revision' and 'logging' tables (actually views) on Labs, use the indexed versions
237
        // (that have some rows hidden, e.g. for revdeleted users).
238 8
        // This is a safeguard in case table mapping isn't properly set up.
239 1
        $isLoggingOrRevision = in_array($tableName, ['revision', 'logging', 'archive']);
240 8
        if (!$mapped && $isLoggingOrRevision && $this->isLabs()) {
241
            $tableName .="_userindex";
242 1
        }
243
244
        // Figure out database name.
245 8
        // Use class variable for the database name if not set via function parameter.
246
        if ($this->isLabs() && '_p' != substr($databaseName, -2)) {
247
            // Append '_p' if this is labs.
248
            $databaseName .= '_p';
249
        }
250
251
        return "`$databaseName`.`$tableName`";
252
    }
253
254
    /**
255
     * Get a unique cache key for the given list of arguments. Assuming each argument of
256 1
     * your function should be accounted for, you can pass in them all with func_get_args:
257
     *   $this->getCacheKey(func_get_args(), 'unique key for function');
258 1
     * Arguments that are a model should implement their own getCacheKey() that returns
259 1
     * a unique identifier for an instance of that model. See User::getCacheKey() for example.
260 1
     * @param array|mixed $args Array of arguments or a single argument.
261 1
     * @param string $key Unique key for this function. If omitted the function name itself
262 1
     *   is used, which is determined using `debug_backtrace`.
263 1
     * @return string
264
     */
265
    public function getCacheKey($args, $key = null): string
266
    {
267
        if (null === $key) {
268
            $key = debug_backtrace()[1]['function'];
269
        }
270
271
        if (!is_array($args)) {
272
            $args = [$args];
273
        }
274
275
        // Start with base key.
276
        $cacheKey = $key;
277
278 1
        // Loop through and determine what values to use based on type of object.
279
        foreach ($args as $arg) {
280 1
            // Zero is an acceptable value.
281 1
            if ('' === $arg || null === $arg) {
282
                continue;
283 1
            }
284 1
285
            $cacheKey .= $this->getCacheKeyFromArg($arg);
286 1
        }
287 1
288 1
        // Remove reserved characters.
289
        return preg_replace('/[{}()\/\@\:"]/', '', $cacheKey);
290
    }
291 1
292
    /**
293
     * Get a cache-friendly string given an argument.
294
     * @param mixed $arg
295
     * @return string
296
     */
297
    private function getCacheKeyFromArg($arg): string
298
    {
299
        if (method_exists($arg, 'getCacheKey')) {
300
            return '.'.$arg->getCacheKey();
301
        } elseif (is_array($arg)) {
302
            // Assumed to be an array of objects that can be parsed into a string.
303
            return '.'.md5(implode('', $arg));
304
        } else {
305
            // Assumed to be a string, number or boolean.
306
            return '.'.md5((string)$arg);
307
        }
308
    }
309
310
    /**
311
     * Set the cache with given options.
312
     * @param string $cacheKey
313
     * @param mixed $value
314
     * @param string $duration Valid DateInterval string.
315
     * @return mixed The given $value.
316
     */
317
    public function setCache(string $cacheKey, $value, $duration = 'PT20M')
318
    {
319
        $cacheItem = $this->cache
320
            ->getItem($cacheKey)
321
            ->set($value)
322
            ->expiresAfter(new DateInterval($duration));
323
        $this->cache->save($cacheItem);
324
        return $value;
325
    }
326
327
    /********************************
328
     * DATABASE INTERACTION HELPERS *
329
     ********************************/
330
331
    /**
332
     * Creates WHERE conditions with date range to be put in query.
333
     * @param false|int $start
334
     * @param false|int $end
335
     * @param string $tableAlias Alias of table FOLLOWED BY DOT.
336
     * @param string $field
337
     * @return string
338
     */
339
    public function getDateConditions($start, $end, $tableAlias = '', $field = 'rev_timestamp'): string
340
    {
341
        $datesConditions = '';
342
        if (false != $start) {
0 ignored issues
show
Bug Best Practice introduced by
It seems like you are loosely comparing $start of type false|integer against false; this is ambiguous if the integer can be zero. Consider using a strict comparison !== instead.
Loading history...
343
            // Convert to YYYYMMDDHHMMSS. *who in the world thought of having time in BLOB of this format ;-;*
344
            $start = date('Ymd', $start).'000000';
345
            $datesConditions .= " AND {$tableAlias}{$field} >= '$start'";
346
        }
347
        if (false != $end) {
0 ignored issues
show
Bug Best Practice introduced by
It seems like you are loosely comparing $end of type false|integer against false; this is ambiguous if the integer can be zero. Consider using a strict comparison !== instead.
Loading history...
348
            $end = date('Ymd', $end).'235959';
349
            $datesConditions .= " AND {$tableAlias}{$field} <= '$end'";
350
        }
351
352
        return $datesConditions;
353
    }
354
355
    /**
356
     * Execute a query using the projects connection, handling certain Exceptions.
357
     * @param Project|string $project Project instance, database name (i.e. 'enwiki'), or slice (i.e. 's1').
358
     * @param string $sql
359
     * @param array $params Parameters to bound to the prepared query.
360
     * @param int|null $timeout Maximum statement time in seconds. null will use the
361
     *   default specified by the app.query_timeout config parameter.
362
     * @return ResultStatement
363
     * @throws DriverException
364
     * @throws \Doctrine\DBAL\DBALException
365
     * @codeCoverageIgnore
366
     */
367
    public function executeProjectsQuery(
368
        $project,
369
        string $sql,
370
        array $params = [],
371
        ?int $timeout = null
372
    ): ResultStatement {
373
        try {
374
            $timeout = $timeout ?? $this->container->getParameter('app.query_timeout');
375
            $sql = "SET STATEMENT max_statement_time = $timeout FOR\n".$sql;
376
377
            return $this->getProjectsConnection($project)->executeQuery($sql, $params);
378
        } catch (DriverException $e) {
379
            $this->handleDriverError($e, $timeout);
380
        }
381
    }
382
383
    /**
384
     * Execute a query using the projects connection, handling certain Exceptions.
385
     * @param QueryBuilder $qb
386
     * @param int|null $timeout Maximum statement time in seconds. null will use the
387
     *   default specified by the app.query_timeout config parameter.
388
     * @return ResultStatement
389
     * @throws HttpException
390
     * @throws DriverException
391
     * @codeCoverageIgnore
392
     */
393
    public function executeQueryBuilder(QueryBuilder $qb, ?int $timeout = null): ResultStatement
394
    {
395
        try {
396
            $timeout = $timeout ?? $this->container->getParameter('app.query_timeout');
397
            $sql = "SET STATEMENT max_statement_time = $timeout FOR\n".$qb->getSQL();
398
            return $qb->getConnection()->executeQuery($sql, $qb->getParameters(), $qb->getParameterTypes());
399
        } catch (DriverException $e) {
400
            $this->handleDriverError($e, $timeout);
401
        }
402
    }
403
404
    /**
405
     * Special handling of some DriverExceptions, otherwise original Exception is thrown.
406
     * @param DriverException $e
407
     * @param int $timeout Timeout value, if applicable. This is passed to the i18n message.
408
     * @throws HttpException
409
     * @throws DriverException
410
     * @codeCoverageIgnore
411
     */
412
    private function handleDriverError(DriverException $e, int $timeout): void
413
    {
414
        // If no value was passed for the $timeout, it must be the default.
415
        if (null === $timeout) {
0 ignored issues
show
introduced by
The condition null === $timeout is always false.
Loading history...
416
            $timeout = $this->container->getParameter('app.query_timeout');
417
        }
418
419
        if (1226 === $e->getErrorCode()) {
420
            throw new ServiceUnavailableHttpException(30, 'error-service-overload', null, 503);
421
        } elseif (in_array($e->getErrorCode(), [1969, 2006, 2013])) {
422
            // FIXME: Attempt to reestablish connection on 2006 error (MySQL server has gone away).
423
            throw new HttpException(504, 'error-query-timeout', null, [], $timeout);
424
        } else {
425
            throw $e;
426
        }
427
    }
428
}
429