Passed
Branch master (0917e1)
by MusikAnimal
11:12
created

Repository   B

Complexity

Total Complexity 51

Size/Duplication

Total Lines 409
Duplicated Lines 0 %

Test Coverage

Coverage 65.56%

Importance

Changes 1
Bugs 0 Features 0
Metric Value
eloc 128
dl 0
loc 409
ccs 59
cts 90
cp 0.6556
rs 7.92
c 1
b 0
f 0
wmc 51

16 Methods

Rating   Name   Duplication   Size   Complexity  
A __construct() 0 3 1
A setContainer() 0 5 1
A isLabs() 0 3 1
A getCacheKey() 0 25 6
A getProjectsConnection() 0 16 3
B getDbList() 0 42 8
A executeApiRequest() 0 11 1
A getToolsConnection() 0 9 2
A getCacheKeyFromArg() 0 10 3
A getMetaConnection() 0 6 2
A setCache() 0 8 1
B getTableName() 0 32 10
A handleDriverError() 0 14 4
A executeProjectsQuery() 0 13 2
A executeQueryBuilder() 0 8 2
A getDateConditions() 0 29 4

How to fix   Complexity   

Complex Class

Complex classes like Repository often do a lot of different things. To break such a class down, we need to identify a cohesive component within that class. A common approach to find such a component is to look for fields/methods that share the same prefixes, or suffixes.

Once you have determined the fields that belong together, you can apply the Extract Class refactoring. If the component makes sense as a sub-class, Extract Subclass is also a candidate, and is often faster.

While breaking up the class, it is a good idea to analyze how other classes use Repository, and based on these observations, apply Extract Interface, too.

1
<?php
2
/**
3
 * This file contains only the Repository class.
4
 */
5
6
declare(strict_types = 1);
7
8
namespace AppBundle\Repository;
9
10
use AppBundle\Model\Project;
11
use DateInterval;
12
use Doctrine\DBAL\Connection;
13
use Doctrine\DBAL\Driver\ResultStatement;
14
use Doctrine\DBAL\Exception\DriverException;
15
use Doctrine\DBAL\Query\QueryBuilder;
16
use GuzzleHttp\Client;
17
use Psr\Cache\CacheItemPoolInterface;
18
use Psr\Log\LoggerInterface;
19
use Psr\Log\NullLogger;
20
use Symfony\Component\DependencyInjection\ContainerInterface;
21
use Symfony\Component\HttpFoundation\Response;
22
use Symfony\Component\HttpKernel\Exception\HttpException;
23
use Symfony\Component\HttpKernel\Exception\ServiceUnavailableHttpException;
24
25
/**
26
 * A repository is responsible for retrieving data from wherever it lives (databases, APIs, filesystems, etc.)
27
 */
28
abstract class Repository
29
{
30
    /** @var ContainerInterface The application's DI container. */
31
    protected $container;
32
33
    /** @var Connection The database connection to the meta database. */
34
    private $metaConnection;
35
36
    /** @var Connection The database connection to other tools' databases.  */
37
    private $toolsConnection;
38
39
    /** @var CacheItemPoolInterface The cache. */
40
    protected $cache;
41
42
    /** @var LoggerInterface The logger. */
43
    protected $log;
44
45
    /** @var string Prefix URL for where the dblists live. Will be followed by i.e. 's1.dblist' */
46
    public const DBLISTS_URL = 'https://noc.wikimedia.org/conf/dblists/';
47
48
    /**
49
     * Create a new Repository with nothing but a null-logger.
50
     */
51 18
    public function __construct()
52
    {
53 18
        $this->log = new NullLogger();
54 18
    }
55
56
    /**
57
     * Set the DI container.
58
     * @param ContainerInterface $container
59
     */
60 17
    public function setContainer(ContainerInterface $container): void
61
    {
62 17
        $this->container = $container;
63 17
        $this->cache = $container->get('cache.app');
64 17
        $this->log = $container->get('logger');
65 17
    }
66
67
    /**
68
     * Is XTools connecting to WMF Labs?
69
     * @return bool
70
     * @codeCoverageIgnore
71
     */
72
    public function isLabs(): bool
73
    {
74
        return (bool)$this->container->getParameter('app.is_labs');
75
    }
76
77
    /***************
78
     * CONNECTIONS *
79
     ***************/
80
81
    /**
82
     * Get the database connection for the 'meta' database.
83
     * @return Connection
84
     * @codeCoverageIgnore
85
     */
86
    protected function getMetaConnection(): Connection
87
    {
88
        if (!$this->metaConnection instanceof Connection) {
0 ignored issues
show
introduced by
$this->metaConnection is always a sub-type of Doctrine\DBAL\Connection.
Loading history...
89
            $this->metaConnection = $this->getProjectsConnection('meta');
90
        }
91
        return $this->metaConnection;
92
    }
93
94
    /**
95
     * Get a database connection for the given database.
96
     * @param Project|string $project Project instance, database name (i.e. 'enwiki'), or slice (i.e. 's1').
97
     * @return Connection
98
     * @codeCoverageIgnore
99
     */
100
    protected function getProjectsConnection($project): Connection
101
    {
102
        if (is_string($project)) {
103
            if (1 === preg_match('/^s\d+$/', $project)) {
104
                $slice = $project;
105
            } else {
106
                // Assume database name. Remove _p if given.
107
                $db = str_replace('_p', '', $project);
108
                $slice = $this->getDbList()[$db];
109
            }
110
        } else {
111
            $slice = $this->getDbList()[$project->getDatabaseName()];
112
        }
113
114
        return $this->container->get('doctrine')
115
            ->getConnection('toolforge_'.$slice);
116
    }
117
118
    /**
119
     * Get the database connection for the 'tools' database (the one that other tools store data in).
120
     * @return Connection
121
     * @codeCoverageIgnore
122
     */
123
    protected function getToolsConnection(): Connection
124
    {
125
        if (!$this->toolsConnection instanceof Connection) {
0 ignored issues
show
introduced by
$this->toolsConnection is always a sub-type of Doctrine\DBAL\Connection.
Loading history...
126
            $this->toolsConnection = $this->container
127
                ->get('doctrine')
128
                ->getManager('toolsdb')
129
                ->getConnection();
130
        }
131
        return $this->toolsConnection;
132
    }
133
134
    /**
135
     * Fetch and concatenate all the dblists into one array.
136
     * Based on ToolforgeBundle https://github.com/wikimedia/ToolforgeBundle/blob/master/Service/ReplicasClient.php
137
     * License: GPL 3.0 or later
138
     * @return string[] Keys are database names (i.e. 'enwiki'), values are the slices (i.e. 's1').
139
     */
140
    protected function getDbList(): array
141
    {
142
        $cacheKey = 'dblists';
143
        if ($this->cache->hasItem($cacheKey)) {
144
            return $this->cache->getItem($cacheKey)->get();
145
        }
146
147
        /** @var Client $client */
148
        $client = $this->container->get('eight_points_guzzle.client.xtools');
149
150
        $dbList = [];
151
        $exists = true;
152
        $i = 0;
153
154
        while ($exists) {
155
            $i += 1;
156
            $response = $client->request('GET', self::DBLISTS_URL."s$i.dblist", ['http_errors' => false]);
157
            $exists = in_array(
158
                $response->getStatusCode(),
159
                [Response::HTTP_OK, Response::HTTP_NOT_MODIFIED]
160
            ) && $i < 50; // Safeguard
161
162
            if (!$exists) {
163
                break;
164
            }
165
166
            $lines = explode("\n", $response->getBody()->getContents());
167
            foreach ($lines as $line) {
168
                $line = trim($line);
169
                if (1 !== preg_match('/^#/', $line) && '' !== $line) {
170
                    // Skip comments and blank lines.
171
                    $dbList[$line] = "s$i";
172
                }
173
            }
174
        }
175
176
        // Manually add the meta and centralauth databases.
177
        $dbList['meta'] = 's7';
178
        $dbList['centralauth'] = 's7';
179
180
        // Cache for one week.
181
        return $this->setCache($cacheKey, $dbList, 'P1W');
182
    }
183
184
    /*****************
185
     * QUERY HELPERS *
186
     *****************/
187
188
    /**
189
     * Make a request to the MediaWiki API.
190
     * @param Project $project
191
     * @param array $params
192
     * @return array
193
     */
194 2
    public function executeApiRequest(Project $project, array $params): array
195
    {
196
        /** @var Client $client */
197 2
        $client = $this->container->get('eight_points_guzzle.client.xtools');
198
199 2
        return json_decode($client->request('GET', $project->getApiUrl(), [
200 2
            'query' => array_merge([
201 2
                'action' => 'query',
202
                'format' => 'json',
203 2
            ], $params),
204 2
        ])->getBody()->getContents(), true);
205
    }
206
207
    /**
208
     * Normalize and quote a table name for use in SQL.
209
     * @param string $databaseName
210
     * @param string $tableName
211
     * @param string|null $tableExtension Optional table extension, which will only get used if we're on labs.
212
     *   If null, table extensions are added as defined in table_map.yml. If a blank string, no extension is added.
213
     * @return string Fully-qualified and quoted table name.
214
     */
215 1
    public function getTableName(string $databaseName, string $tableName, ?string $tableExtension = null): string
216
    {
217 1
        $mapped = false;
218
219
        // This is a workaround for a one-to-many mapping
220
        // as required by Labs. We combine $tableName with
221
        // $tableExtension in order to generate the new table name
222 1
        if ($this->isLabs() && null !== $tableExtension) {
223
            $mapped = true;
224
            $tableName .=('' === $tableExtension ? '' : '_'.$tableExtension);
225 1
        } elseif ($this->container->hasParameter("app.table.$tableName")) {
226
            // Use the table specified in the table mapping configuration, if present.
227
            $mapped = true;
228
            $tableName = $this->container->getParameter("app.table.$tableName");
229
        }
230
231
        // For 'revision' and 'logging' tables (actually views) on Labs, use the indexed versions
232
        // (that have some rows hidden, e.g. for revdeleted users).
233
        // This is a safeguard in case table mapping isn't properly set up.
234 1
        $isLoggingOrRevision = in_array($tableName, ['revision', 'logging', 'archive']);
235 1
        if (!$mapped && $isLoggingOrRevision && $this->isLabs()) {
236
            $tableName .="_userindex";
237
        }
238
239
        // Figure out database name.
240
        // Use class variable for the database name if not set via function parameter.
241 1
        if ($this->isLabs() && '_p' != substr($databaseName, -2)) {
242
            // Append '_p' if this is labs.
243
            $databaseName .= '_p';
244
        }
245
246 1
        return "`$databaseName`.`$tableName`";
247
    }
248
249
    /**
250
     * Get a unique cache key for the given list of arguments. Assuming each argument of
251
     * your function should be accounted for, you can pass in them all with func_get_args:
252
     *   $this->getCacheKey(func_get_args(), 'unique key for function');
253
     * Arguments that are a model should implement their own getCacheKey() that returns
254
     * a unique identifier for an instance of that model. See User::getCacheKey() for example.
255
     * @param array|mixed $args Array of arguments or a single argument.
256
     * @param string $key Unique key for this function. If omitted the function name itself
257
     *   is used, which is determined using `debug_backtrace`.
258
     * @return string
259
     */
260 8
    public function getCacheKey($args, $key = null): string
261
    {
262 8
        if (null === $key) {
263 1
            $key = debug_backtrace()[1]['function'];
264
        }
265
266 8
        if (!is_array($args)) {
267 8
            $args = [$args];
268
        }
269
270
        // Start with base key.
271 8
        $cacheKey = $key;
272
273
        // Loop through and determine what values to use based on type of object.
274 8
        foreach ($args as $arg) {
275
            // Zero is an acceptable value.
276 8
            if ('' === $arg || null === $arg) {
277 1
                continue;
278
            }
279
280 8
            $cacheKey .= $this->getCacheKeyFromArg($arg);
281
        }
282
283
        // Remove reserved characters.
284 8
        return preg_replace('/[{}()\/\@\:"]/', '', $cacheKey);
285
    }
286
287
    /**
288
     * Get a cache-friendly string given an argument.
289
     * @param mixed $arg
290
     * @return string
291
     */
292 8
    private function getCacheKeyFromArg($arg): string
293
    {
294 8
        if (method_exists($arg, 'getCacheKey')) {
295 1
            return '.'.$arg->getCacheKey();
296 8
        } elseif (is_array($arg)) {
297
            // Assumed to be an array of objects that can be parsed into a string.
298 1
            return '.'.md5(implode('', $arg));
299
        } else {
300
            // Assumed to be a string, number or boolean.
301 8
            return '.'.md5((string)$arg);
302
        }
303
    }
304
305
    /**
306
     * Set the cache with given options.
307
     * @param string $cacheKey
308
     * @param mixed $value
309
     * @param string $duration Valid DateInterval string.
310
     * @return mixed The given $value.
311
     */
312 1
    public function setCache(string $cacheKey, $value, $duration = 'PT20M')
313
    {
314 1
        $cacheItem = $this->cache
315 1
            ->getItem($cacheKey)
316 1
            ->set($value)
317 1
            ->expiresAfter(new DateInterval($duration));
318 1
        $this->cache->save($cacheItem);
319 1
        return $value;
320
    }
321
322
    /********************************
323
     * DATABASE INTERACTION HELPERS *
324
     ********************************/
325
326
    /**
327
     * Creates WHERE conditions with date range to be put in query.
328
     * @param false|int $start Unix timestamp.
329
     * @param false|int $end Unix timestamp.
330
     * @param false|int $offset Unix timestamp. Used for pagination, will end up replacing $end.
331
     * @param string $tableAlias Alias of table FOLLOWED BY DOT.
332
     * @param string $field
333
     * @return string
334
     */
335 1
    public function getDateConditions(
336
        $start,
337
        $end,
338
        $offset = false,
339
        string $tableAlias = '',
340
        string $field = 'rev_timestamp'
341
    ) : string {
342 1
        $datesConditions = '';
343
344 1
        if (is_int($start)) {
345
            // Convert to YYYYMMDDHHMMSS.
346 1
            $start = date('Ymd', $start).'000000';
347 1
            $datesConditions .= " AND {$tableAlias}{$field} >= '$start'";
348
        }
349
350
        // When we're given an $offset, it basically replaces $end, except it's also a full timestamp,
351
        // and for pagination purposes we use < in the comparison instead of <= to prevent the last edit
352
        // from the previous page being shown as the first on the next page. This matches MediaWiki
353
        // behavior, which suggests it's not possible for two edits to be made in the same second (???).
354
        // FIXME: For Global Contribs it's possible edits are made at the same second on different wikis.
355 1
        if (is_int($offset)) {
356 1
            $offset = date('YmdHis', $offset);
357 1
            $datesConditions .= " AND {$tableAlias}{$field} < '$offset'";
358 1
        } elseif (is_int($end)) {
359 1
            $end = date('Ymd', $end) . '235959';
360 1
            $datesConditions .= " AND {$tableAlias}{$field} <= '$end'";
361
        }
362
363 1
        return $datesConditions;
364
    }
365
366
    /**
367
     * Execute a query using the projects connection, handling certain Exceptions.
368
     * @param Project|string $project Project instance, database name (i.e. 'enwiki'), or slice (i.e. 's1').
369
     * @param string $sql
370
     * @param array $params Parameters to bound to the prepared query.
371
     * @param int|null $timeout Maximum statement time in seconds. null will use the
372
     *   default specified by the app.query_timeout config parameter.
373
     * @return ResultStatement
374
     * @throws DriverException
375
     * @throws \Doctrine\DBAL\DBALException
376
     * @codeCoverageIgnore
377
     */
378
    public function executeProjectsQuery(
379
        $project,
380
        string $sql,
381
        array $params = [],
382
        ?int $timeout = null
383
    ): ResultStatement {
384
        try {
385
            $timeout = $timeout ?? $this->container->getParameter('app.query_timeout');
386
            $sql = "SET STATEMENT max_statement_time = $timeout FOR\n".$sql;
387
388
            return $this->getProjectsConnection($project)->executeQuery($sql, $params);
389
        } catch (DriverException $e) {
390
            $this->handleDriverError($e, $timeout);
391
        }
392
    }
393
394
    /**
395
     * Execute a query using the projects connection, handling certain Exceptions.
396
     * @param QueryBuilder $qb
397
     * @param int|null $timeout Maximum statement time in seconds. null will use the
398
     *   default specified by the app.query_timeout config parameter.
399
     * @return ResultStatement
400
     * @throws HttpException
401
     * @throws DriverException
402
     * @codeCoverageIgnore
403
     */
404
    public function executeQueryBuilder(QueryBuilder $qb, ?int $timeout = null): ResultStatement
405
    {
406
        try {
407
            $timeout = $timeout ?? $this->container->getParameter('app.query_timeout');
408
            $sql = "SET STATEMENT max_statement_time = $timeout FOR\n".$qb->getSQL();
409
            return $qb->getConnection()->executeQuery($sql, $qb->getParameters(), $qb->getParameterTypes());
410
        } catch (DriverException $e) {
411
            $this->handleDriverError($e, $timeout);
412
        }
413
    }
414
415
    /**
416
     * Special handling of some DriverExceptions, otherwise original Exception is thrown.
417
     * @param DriverException $e
418
     * @param int $timeout Timeout value, if applicable. This is passed to the i18n message.
419
     * @throws HttpException
420
     * @throws DriverException
421
     * @codeCoverageIgnore
422
     */
423
    private function handleDriverError(DriverException $e, int $timeout): void
424
    {
425
        // If no value was passed for the $timeout, it must be the default.
426
        if (null === $timeout) {
0 ignored issues
show
introduced by
The condition null === $timeout is always false.
Loading history...
427
            $timeout = $this->container->getParameter('app.query_timeout');
428
        }
429
430
        if (1226 === $e->getErrorCode()) {
431
            throw new ServiceUnavailableHttpException(30, 'error-service-overload', null, 503);
432
        } elseif (in_array($e->getErrorCode(), [1969, 2006, 2013])) {
433
            // FIXME: Attempt to reestablish connection on 2006 error (MySQL server has gone away).
434
            throw new HttpException(504, 'error-query-timeout', null, [], $timeout);
435
        } else {
436
            throw $e;
437
        }
438
    }
439
}
440