Completed
Pull Request — 1.1 (#7)
by David
04:30
created

SchemaAnalyzer::removeDuplicates()   A

Complexity

Conditions 2
Paths 2

Size

Total Lines 9
Code Lines 5

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
dl 0
loc 9
rs 9.6666
c 0
b 0
f 0
cc 2
eloc 5
nc 2
nop 1
1
<?php
2
3
namespace Mouf\Database\SchemaAnalyzer;
4
5
use Doctrine\Common\Cache\Cache;
6
use Doctrine\Common\Cache\VoidCache;
7
use Doctrine\DBAL\Schema\AbstractSchemaManager;
8
use Doctrine\DBAL\Schema\ForeignKeyConstraint;
9
use Doctrine\DBAL\Schema\Schema;
10
use Doctrine\DBAL\Schema\SchemaException;
11
use Doctrine\DBAL\Schema\Table;
12
use Fhaculty\Graph\Edge\Base;
13
use Fhaculty\Graph\Graph;
14
use Fhaculty\Graph\Vertex;
15
16
/**
17
 * This class can analyze a database model.
18
 * In this class you will find.
19
 *
20
 * - Functions to automatically detect **junction tables**
21
 * - Functions to compute the shortest path between 2 tables based on the relationships stored in the schema.
22
 */
23
class SchemaAnalyzer
24
{
25
    private static $WEIGHT_FK = 1;
26
    private static $WEIGHT_INHERITANCE_FK = 0.1;
27
    private static $WEIGHT_JOINTURE_TABLE = 1.5;
28
29
    const WEIGHT_IMPORTANT = 0.75;
30
    const WEIGHT_IRRELEVANT = 2;
31
    const WEIGHT_IGNORE = INF;
32
33
    /**
34
     * @var AbstractSchemaManager
35
     */
36
    private $schemaManager;
37
38
    /**
39
     * @var Schema
40
     */
41
    private $schema;
42
43
    /**
44
     * @var Cache
45
     */
46
    private $cache;
47
48
    /**
49
     * @var string
50
     */
51
    private $cachePrefix;
52
53
    /**
54
     * Nested arrays containing table => column => cost.
55
     *
56
     * @var float[][]
57
     */
58
    private $alteredCosts = [];
59
60
    /**
61
     * Array containing table cost.
62
     *
63
     * @var float[]
64
     */
65
    private $alteredTableCosts = [];
66
67
    /**
68
     * @param AbstractSchemaManager $schemaManager
69
     * @param Cache|null            $cache          The Doctrine cache service to use to cache results (optional)
70
     * @param string|null           $schemaCacheKey The unique identifier for the schema manager. Compulsory if cache is set.
71
     */
72
    public function __construct(AbstractSchemaManager $schemaManager, Cache $cache = null, $schemaCacheKey = null)
73
    {
74
        $this->schemaManager = $schemaManager;
75
        if (empty($schemaCacheKey) && $cache) {
76
            throw new SchemaAnalyzerException('You must provide a schema cache key if you configure SchemaAnalyzer with cache support.');
77
        }
78
        if ($cache) {
79
            $this->cache = $cache;
80
        } else {
81
            $this->cache = new VoidCache();
82
        }
83
        $this->cachePrefix = $schemaCacheKey;
84
    }
85
86
    /**
87
     * Detect all junctions tables in the schema.
88
     * A table is a junction table if:.
89
     *
90
     * - it has exactly 2 foreign keys
91
     * - it has only 2 columns (or 3 columns if the third one is an autoincremented primary key).
92
     *
93
     * If $ignoreReferencedTables is true, junctions table that are pointed to by a foreign key of another
94
     * table are ignored.
95
     *
96
     * @param bool $ignoreReferencedTables
97
     *
98
     * @return Table[]
99
     */
100
    public function detectJunctionTables($ignoreReferencedTables = false)
101
    {
102
        $junctionTablesKey = $this->cachePrefix.'_junctiontables_'.($ignoreReferencedTables ? 'true' : 'false');
103
        $junctionTables = $this->cache->fetch($junctionTablesKey);
104
        if ($junctionTables === false) {
105
            $junctionTables = array_filter($this->getSchema()->getTables(), function (Table $table) use ($ignoreReferencedTables) {
106
                return $this->isJunctionTable($table, $ignoreReferencedTables);
107
            });
108
            $this->cache->save($junctionTablesKey, $junctionTables);
109
        }
110
111
        return $junctionTables;
112
    }
113
114
    /**
115
     * Returns true if $table is a junction table.
116
     * I.e:.
117
     *
118
     * - it must have exactly 2 foreign keys
119
     * - it must have only 2 columns (or 3 columns if the third one is an autoincremented primary key).
120
     *
121
     * If $ignoreReferencedTables is true, junctions table that are pointed to by a foreign key of another
122
     * table are ignored.
123
     *
124
     * @param Table $table
125
     * @param bool  $ignoreReferencedTables
126
     *
127
     * @return bool
128
     */
129
    private function isJunctionTable(Table $table, $ignoreReferencedTables = false)
130
    {
131
        $foreignKeys = $table->getForeignKeys();
132
        if (count($foreignKeys) !== 2) {
133
            return false;
134
        }
135
136
        $columns = $table->getColumns();
137
        if (count($columns) < 2 || count($columns) > 3) {
138
            return false;
139
        }
140
141
        if ($table->hasPrimaryKey()) {
142
            $pkColumns = $table->getPrimaryKeyColumns();
143
        } else {
144
            $pkColumns = [];
145
        }
146
147
        if (count($pkColumns) === 1 && count($columns) === 2) {
148
            return false;
149
        }
150
151
        if (count($pkColumns) !== 1 && count($columns) === 3) {
152
            return false;
153
        }
154
155
        $fkColumnNames = [];
156
        foreach ($foreignKeys as $foreignKey) {
157
            $fkColumns = $foreignKey->getColumns();
158
            if (count($fkColumns) !== 1) {
159
                return false;
160
            }
161
            $fkColumnNames[$fkColumns[0]] = true;
162
        }
163
164
        if (count($columns) === 3) {
165
            // Let's check that the third column (the ID is NOT a foreign key)
166
            if (isset($fkColumnNames[$pkColumns[0]])) {
167
                return false;
168
            }
169
170
            // Let's check that the primary key is autoincremented
171
            if (!$table->getColumn($pkColumns[0])->getAutoincrement()) {
172
                return false;
173
            }
174
        }
175
176
        if ($ignoreReferencedTables && $this->isTableReferenced($table)) {
177
            return false;
178
        }
179
180
        return true;
181
    }
182
183
    /**
184
     * Returns true if the table $table is referenced by another table.
185
     *
186
     * @param Table $table
187
     *
188
     * @return bool
189
     */
190
    private function isTableReferenced(Table $table)
191
    {
192
        $tableName = $table->getName();
193
        foreach ($this->getSchema()->getTables() as $tableIter) {
194
            foreach ($tableIter->getForeignKeys() as $fk) {
195
                if ($fk->getForeignTableName() === $tableName) {
196
                    return true;
197
                }
198
            }
199
        }
200
201
        return false;
202
    }
203
204
    /**
205
     * Get the shortest path between 2 tables.
206
     *
207
     * @param string $fromTable
208
     * @param string $toTable
209
     *
210
     * @return \Doctrine\DBAL\Schema\ForeignKeyConstraint[]
211
     *
212
     * @throws SchemaAnalyzerException
213
     */
214
    public function getShortestPath($fromTable, $toTable)
215
    {
216
        return $this->fromCache($this->cachePrefix.'_shortest_'.$fromTable.'```'.$toTable, function () use ($fromTable, $toTable) {
217
            return $this->getShortestPathWithoutCache($fromTable, $toTable);
218
        });
219
    }
220
221
    /**
222
     * Get the shortest path between 2 tables.
223
     *
224
     * @param string $fromTable
225
     * @param string $toTable
226
     *
227
     * @return \Doctrine\DBAL\Schema\ForeignKeyConstraint[]
228
     *
229
     * @throws SchemaAnalyzerException
230
     */
231
    private function getShortestPathWithoutCache($fromTable, $toTable)
232
    {
233
        $this->checkTableExists($fromTable);
234
        $this->checkTableExists($toTable);
235
236
        $graph = $this->buildSchemaGraph();
237
238
        try {
239
            $predecessors = MultiDijkstra::findShortestPaths($graph->getVertex($fromTable), $graph->getVertex($toTable));
240
            $edges = MultiDijkstra::getCheapestPathFromPredecesArray($graph->getVertex($fromTable), $graph->getVertex($toTable), $predecessors);
241
        } catch (MultiDijkstraAmbiguityException $e) {
242
            // If there is more than 1 short path, let's display this.
243
            $paths = MultiDijkstra::getAllPossiblePathsFromPredecesArray($graph->getVertex($fromTable), $graph->getVertex($toTable), $predecessors);
244
            $msg = $this->getAmbiguityExceptionMessage($paths, $graph->getVertex($fromTable), $graph->getVertex($toTable));
245
            throw new ShortestPathAmbiguityException($msg);
246
        }
247
248
        $foreignKeys = [];
249
250
        $currentTable = $fromTable;
251
252
        foreach ($edges as $edge) {
253
            /* @var $edge Base */
254
255
            if ($fk = $edge->getAttribute('fk')) {
256
                /* @var $fk ForeignKeyConstraint */
257
                $foreignKeys[] = $fk;
258
                if ($fk->getForeignTableName() == $currentTable) {
259
                    $currentTable = $fk->getLocalTable()->getName();
260
                } else {
261
                    $currentTable = $fk->getForeignTableName();
262
                }
263
            } elseif ($junctionTable = $edge->getAttribute('junction')) {
264
                /* @var $junctionTable Table */
265
                $junctionFks = array_values($junctionTable->getForeignKeys());
266
                // We need to order the 2 FKs. The first one is the one that has a common point with the current table.
267
                $fk = $junctionFks[0];
268
                if ($fk->getForeignTableName() == $currentTable) {
269
                    $foreignKeys[] = $fk;
270
                    $foreignKeys[] = $junctionFks[1];
271
                } else {
272
                    $foreignKeys[] = $junctionFks[1];
273
                    $foreignKeys[] = $fk;
274
                }
275
            } else {
276
                // @codeCoverageIgnoreStart
277
                throw new SchemaAnalyzerException('Unexpected edge. We should have a fk or a junction attribute.');
278
                // @codeCoverageIgnoreEnd
279
            }
280
        }
281
282
        return $foreignKeys;
283
    }
284
285
    private function checkTableExists($tableName)
286
    {
287
        try {
288
            $this->getSchema()->getTable($tableName);
289
        } catch (SchemaException $e) {
290
            throw SchemaAnalyzerTableNotFoundException::tableNotFound($tableName, $this->schema, $e);
291
        }
292
    }
293
294
    private function buildSchemaGraph()
295
    {
296
        $graph = new Graph();
297
298
        // First, let's create all the vertex
299
        foreach ($this->getSchema()->getTables() as $table) {
300
            $graph->createVertex($table->getName());
301
        }
302
303
        // Then, let's create all the edges
304
        foreach ($this->getSchema()->getTables() as $table) {
305
            $fks = $this->removeDuplicates($table->getForeignKeys());
306
            foreach ($fks as $fk) {
307
                // Create an undirected edge, with weight = 1
308
                $edge = $graph->getVertex($table->getName())->createEdge($graph->getVertex($fk->getForeignTableName()));
309
                if (isset($this->alteredCosts[$fk->getLocalTable()->getName()][implode(',', $fk->getLocalColumns())])) {
310
                    $cost = $this->alteredCosts[$fk->getLocalTable()->getName()][implode(',', $fk->getLocalColumns())];
311
                } elseif ($this->isInheritanceRelationship($fk)) {
312
                    $cost = self::$WEIGHT_INHERITANCE_FK;
313
                } else {
314
                    $cost = self::$WEIGHT_FK;
315
                }
316
                if (isset($this->alteredTableCosts[$fk->getLocalTable()->getName()])) {
317
                    $cost *= $this->alteredTableCosts[$fk->getLocalTable()->getName()];
318
                }
319
320
                $edge->setWeight($cost);
321
                $edge->getAttributeBag()->setAttribute('fk', $fk);
322
            }
323
        }
324
325
        // Finally, let's add virtual edges for the junction tables
326
        foreach ($this->detectJunctionTables() as $junctionTable) {
327
            $tables = [];
328
            foreach ($junctionTable->getForeignKeys() as $fk) {
329
                $tables[] = $fk->getForeignTableName();
330
            }
331
332
            $edge = $graph->getVertex($tables[0])->createEdge($graph->getVertex($tables[1]));
333
            $cost = self::$WEIGHT_JOINTURE_TABLE;
334
            if (isset($this->alteredTableCosts[$junctionTable->getName()])) {
335
                $cost *= $this->alteredTableCosts[$junctionTable->getName()];
336
            }
337
            $edge->setWeight($cost);
338
            $edge->getAttributeBag()->setAttribute('junction', $junctionTable);
339
        }
340
341
        return $graph;
342
    }
343
344
    /**
345
     * Remove duplicate foreign keys (assumes that all foreign yes are from the same local table).
346
     *
347
     * @param ForeignKeyConstraint[] $foreignKeys
348
     * @return ForeignKeyConstraint[]
349
     */
350
    private function removeDuplicates(array $foreignKeys): array
351
    {
352
        $fks = [];
353
        foreach ($foreignKeys as $foreignKey) {
354
            $fks[implode('__`__', $foreignKey->getLocalColumns())] = $foreignKey;
355
        }
356
357
        return array_values($fks);
358
    }
359
360
    /**
361
     * Returns the schema (from the schema manager or the cache if needed).
362
     *
363
     * @return Schema
364
     */
365
    private function getSchema()
366
    {
367
        if ($this->schema === null) {
368
            $schemaKey = $this->cachePrefix.'_schema';
369
            $this->schema = $this->cache->fetch($schemaKey);
370
            if (empty($this->schema)) {
371
                $this->schema = $this->schemaManager->createSchema();
372
                $this->cache->save($schemaKey, $this->schema);
373
            }
374
        }
375
376
        return $this->schema;
377
    }
378
379
    /**
380
     * Returns the full exception message when an ambiguity arises.
381
     *
382
     * @param Base[][] $paths
383
     * @param Vertex   $startVertex
384
     */
385
    private function getAmbiguityExceptionMessage(array $paths, Vertex $startVertex, Vertex $endVertex)
386
    {
387
        $textPaths = [];
388
        $i = 1;
389
        foreach ($paths as $path) {
390
            $textPaths[] = 'Path '.$i.': '.$this->getTextualPath($path, $startVertex);
391
            ++$i;
392
        }
393
394
        $msg = sprintf("There are many possible shortest paths between table '%s' and table '%s'\n\n",
395
            $startVertex->getId(), $endVertex->getId());
396
397
        $msg .= implode("\n\n", $textPaths);
398
399
        return $msg;
400
    }
401
402
    /**
403
     * Returns the textual representation of the path.
404
     *
405
     * @param Base[] $path
406
     * @param Vertex $startVertex
407
     */
408
    private function getTextualPath(array $path, Vertex $startVertex)
409
    {
410
        $currentVertex = $startVertex;
411
        $currentTable = $currentVertex->getId();
412
413
        $textPath = $currentTable;
414
415
        foreach ($path as $edge) {
416
            /* @var $fk ForeignKeyConstraint */
417
            if ($fk = $edge->getAttribute('fk')) {
418
                if ($fk->getForeignTableName() == $currentTable) {
419
                    $currentTable = $fk->getLocalTable()->getName();
420
                    $isForward = false;
421
                } else {
422
                    $currentTable = $fk->getForeignTableName();
423
                    $isForward = true;
424
                }
425
426
                $columns = implode(',', $fk->getLocalColumns());
427
428
                $textPath .= ' '.(!$isForward ? '<' : '');
429
                $textPath .= '--('.$columns.')--';
430
                $textPath .= ($isForward ? '>' : '').' ';
431
                $textPath .= $currentTable;
432
            } elseif ($junctionTable = $edge->getAttribute('junction')) {
433
                /* @var $junctionTable Table */
434
                $junctionFks = array_values($junctionTable->getForeignKeys());
435
                // We need to order the 2 FKs. The first one is the one that has a common point with the current table.
436
                $fk = $junctionFks[0];
437
                if ($fk->getForeignTableName() == $currentTable) {
438
                    $currentTable = $junctionFks[1]->getForeignTableName();
439
                } else {
440
                    $currentTable = $fk->getForeignTableName();
441
                }
442
                $textPath .= ' <=('.$junctionTable->getName().')=> '.$currentTable;
443
            } else {
444
                // @codeCoverageIgnoreStart
445
                throw new SchemaAnalyzerException('Unexpected edge. We should have a fk or a junction attribute.');
446
                // @codeCoverageIgnoreEnd
447
            }
448
        }
449
450
        return $textPath;
451
    }
452
453
    /**
454
     * Sets the cost of a foreign key.
455
     *
456
     * @param string $tableName
457
     * @param string $columnName
458
     * @param float  $cost
459
     *
460
     * @return $this
461
     */
462
    public function setForeignKeyCost($tableName, $columnName, $cost)
463
    {
464
        $this->alteredCosts[$tableName][$columnName] = $cost;
465
    }
466
467
    /**
468
     * Sets the cost modifier of a table.
469
     *
470
     * @param string $tableName
471
     * @param float  $cost
472
     *
473
     * @return $this
474
     */
475
    public function setTableCostModifier($tableName, $cost)
476
    {
477
        $this->alteredTableCosts[$tableName] = $cost;
478
    }
479
480
    /**
481
     * Sets the cost modifier of all tables at once.
482
     *
483
     * @param array<string, float> $tableCosts The key is the table name, the value is the cost modifier.
484
     */
485
    public function setTableCostModifiers(array $tableCosts)
486
    {
487
        $this->alteredTableCosts = $tableCosts;
488
    }
489
490
    /**
491
     * Sets the cost of all foreign keys at once.
492
     *
493
     * @param array<string, array<string, float>> $fkCosts First key is the table name, second key is the column name, the value is the cost.
494
     */
495
    public function setForeignKeyCosts(array $fkCosts)
496
    {
497
        $this->alteredCosts = $fkCosts;
498
    }
499
500
    /**
501
     * Returns true if this foreign key represents an inheritance relationship,
502
     * i.e. if this foreign key is based on a primary key.
503
     *
504
     * @param ForeignKeyConstraint $fk
505
     *
506
     * @return true
507
     */
508
    private function isInheritanceRelationship(ForeignKeyConstraint $fk)
509
    {
510
        if (!$fk->getLocalTable()->hasPrimaryKey()) {
511
            return false;
512
        }
513
        $fkColumnNames = $fk->getLocalColumns();
514
        $pkColumnNames = $fk->getLocalTable()->getPrimaryKeyColumns();
515
516
        sort($fkColumnNames);
517
        sort($pkColumnNames);
518
519
        return $fkColumnNames == $pkColumnNames;
520
    }
521
522
    /**
523
     * If this table is pointing to a parent table (if its primary key is a foreign key pointing on another table),
524
     * this function will return the pointed table.
525
     * This function will return null if there is no parent table.
526
     *
527
     * @param string $tableName
528
     *
529
     * @return ForeignKeyConstraint|null
530
     */
531
    public function getParentRelationship($tableName)
532
    {
533
        return $this->fromCache($this->cachePrefix.'_parent_'.$tableName, function () use ($tableName) {
534
            return $this->getParentRelationshipWithoutCache($tableName);
535
        });
536
    }
537
538
    /**
539
     * If this table is pointing to a parent table (if its primary key is a foreign key pointing on another table),
540
     * this function will return the pointed table.
541
     * This function will return null if there is no parent table.
542
     *
543
     * @param string $tableName
544
     *
545
     * @return ForeignKeyConstraint|null
546
     */
547
    private function getParentRelationshipWithoutCache($tableName)
548
    {
549
        $table = $this->getSchema()->getTable($tableName);
550
        foreach ($table->getForeignKeys() as $fk) {
551
            if ($this->isInheritanceRelationship($fk)) {
552
                return $fk;
553
            }
554
        }
555
556
        return;
557
    }
558
559
    /**
560
     * If this table is pointed by children tables (if other child tables have a primary key that is also a
561
     * foreign key to this table), this function will return the list of child tables.
562
     * This function will return an empty array if there are no children tables.
563
     *
564
     * @param string $tableName
565
     *
566
     * @return ForeignKeyConstraint[]
567
     */
568
    public function getChildrenRelationships($tableName)
569
    {
570
        return $this->fromCache($this->cachePrefix.'_children_'.$tableName, function () use ($tableName) {
571
            return $this->getChildrenRelationshipsWithoutCache($tableName);
572
        });
573
    }
574
575
    /**
576
     * If this table is pointed by children tables (if other child tables have a primary key that is also a
577
     * foreign key to this table), this function will return the list of child tables.
578
     * This function will return an empty array if there are no children tables.
579
     *
580
     * @param string $tableName
581
     *
582
     * @return ForeignKeyConstraint[]
583
     */
584
    private function getChildrenRelationshipsWithoutCache($tableName)
585
    {
586
        $schema = $this->getSchema();
587
        $children = [];
588
        foreach ($schema->getTables() as $table) {
589
            if ($table->getName() === $tableName) {
590
                continue;
591
            }
592
            $fks = $this->removeDuplicates($table->getForeignKeys());
593
            foreach ($table->getForeignKeys($fks) as $fk) {
0 ignored issues
show
Unused Code introduced by
The call to Table::getForeignKeys() has too many arguments starting with $fks.

This check compares calls to functions or methods with their respective definitions. If the call has more arguments than are defined, it raises an issue.

If a function is defined several times with a different number of parameters, the check may pick up the wrong definition and report false positives. One codebase where this has been known to happen is Wordpress.

In this case you can add the @ignore PhpDoc annotation to the duplicate definition and it will be ignored.

Loading history...
594
                if ($fk->getForeignTableName() === $tableName && $this->isInheritanceRelationship($fk)) {
595
                    $children[] = $fk;
596
                }
597
            }
598
        }
599
600
        return $children;
601
    }
602
603
    /**
604
     * Returns an item from cache or computes it using $closure and puts it in cache.
605
     *
606
     * @param string   $key
607
     * @param callable $closure
608
     *
609
     * @return mixed
610
     */
611
    private function fromCache($key, callable $closure)
612
    {
613
        $item = $this->cache->fetch($key);
614
        if ($item === false) {
615
            $item = $closure();
616
            $this->cache->save($key, $item);
617
        }
618
619
        return $item;
620
    }
621
}
622