| Total Complexity | 64 |
| Total Lines | 392 |
| Duplicated Lines | 0 % |
| Changes | 0 | ||
Complex classes like AbstractLCS often do a lot of different things. To break such a class down, we need to identify a cohesive component within that class. A common approach to find such a component is to look for fields/methods that share the same prefixes, or suffixes.
Once you have determined the fields that belong together, you can apply the Extract Class refactoring. If the component makes sense as a sub-class, Extract Subclass is also a candidate, and is often faster.
While breaking up the class, it is a good idea to analyze how other classes use AbstractLCS, and based on these observations, apply Extract Interface, too.
| 1 | <?php |
||
| 14 | abstract class AbstractLCS |
||
| 15 | { |
||
| 16 | /** @const float 10^8, the value of N*M when to start bindnig the run time. */ |
||
| 17 | private const TOO_LONG = 100000000.0; |
||
| 18 | |||
| 19 | /** @const float Limit the time to D^POW_LIMIT */ |
||
| 20 | private const POW_LIMIT = 1.5; |
||
| 21 | |||
| 22 | /** @var int */ |
||
| 23 | private $maxDifferences = 0; |
||
| 24 | |||
| 25 | /** @var int */ |
||
| 26 | private $length = 0; |
||
| 27 | |||
| 28 | /** |
||
| 29 | * @return int |
||
| 30 | */ |
||
| 31 | public function getLength(): int |
||
| 32 | { |
||
| 33 | return $this->length; |
||
| 34 | } |
||
| 35 | |||
| 36 | /** |
||
| 37 | * Myers' algorithm for longest common subsequence. O((M + N)D) worst case time, O(M + N + D^2) expected time, O(M |
||
| 38 | * + N) space (http://citeseer.ist.psu.edu/myers86ond.html) |
||
| 39 | * |
||
| 40 | * Note: Beyond implementing the algorithm as described in the paper I have added diagonal range compression which |
||
| 41 | * helps when finding the LCS of a very long and a very short sequence, also bound the running time to (N + M)^1.5 |
||
| 42 | * when both sequences are very long. |
||
| 43 | * |
||
| 44 | * After this method is called, the longest common subsequence is available by calling getResult() where result[0] |
||
| 45 | * is composed of entries from l1 and result[1] is composed of entries from l2 |
||
| 46 | */ |
||
| 47 | public function longestCommonSubsequence(): void |
||
| 48 | { |
||
| 49 | $length1 = $this->getLength1(); |
||
| 50 | $length2 = $this->getLength2(); |
||
| 51 | |||
| 52 | if (0 === $length1 || 0 === $length2) { |
||
| 53 | $this->length = 0; |
||
| 54 | |||
| 55 | return; |
||
| 56 | } |
||
| 57 | |||
| 58 | $this->maxDifferences = (int)(($length1 + $length2 + 1) / 2); |
||
| 59 | |||
| 60 | if ($length1 * $length2 > self::TOO_LONG) { |
||
| 61 | // Limit complexity to D^POW_LIMIT for long sequences. |
||
| 62 | $this->maxDifferences = (int)\pow($this->maxDifferences, self::POW_LIMIT - 1.0); |
||
| 63 | } |
||
| 64 | |||
| 65 | $this->initializeLcs($length1); |
||
| 66 | |||
| 67 | // The common prefixes and suffixes are always part of some LCS, include them now to reduce our search space. |
||
| 68 | $max = \min($length1, $length2); |
||
| 69 | |||
| 70 | for ( |
||
| 71 | $forwardBound = 0; |
||
| 72 | $forwardBound < $max && $this->isRangeEqual($forwardBound, $forwardBound); |
||
| 73 | $forwardBound++ |
||
| 74 | ) { |
||
| 75 | $this->setLcs($forwardBound, $forwardBound); |
||
| 76 | } |
||
| 77 | |||
| 78 | $backBoundL1 = $length1 - 1; |
||
| 79 | $backBoundL2 = $length2 - 1; |
||
| 80 | |||
| 81 | while ($backBoundL1 >= $forwardBound && $backBoundL2 >= $forwardBound && |
||
| 82 | $this->isRangeEqual($backBoundL1, $backBoundL2)) { |
||
| 83 | $this->setLcs($backBoundL1, $backBoundL2); |
||
| 84 | $backBoundL1--; |
||
| 85 | $backBoundL2--; |
||
| 86 | } |
||
| 87 | |||
| 88 | $V = array_fill(0, 2, array_fill(0, $length1 + $length2 + 1, 0)); |
||
| 89 | $snake = array_fill(0, 3, 0); |
||
| 90 | $lcsRec = $this->lcsRec($forwardBound, $backBoundL1, $forwardBound, $backBoundL2, $V, $snake); |
||
| 91 | |||
| 92 | $this->length = $forwardBound + $length1 - $backBoundL1 - 1 + $lcsRec; |
||
| 93 | } |
||
| 94 | |||
| 95 | /** |
||
| 96 | * The recursive helper function for Myers' LCS. Computes the LCS of $l1[$bottomL1 .. $topL1] and $l2[$bottomL2 .. |
||
| 97 | * $topL2] fills in the appropriate location in lcs and returns the length. |
||
| 98 | * |
||
| 99 | * @param int $bottomL1 The first sequence |
||
| 100 | * @param int $topL1 Index in the first sequence to start from (inclusive) |
||
| 101 | * @param int $bottomL2 The second sequence |
||
| 102 | * @param int $topL2 Index in the second sequence to start from (inclusive) |
||
| 103 | * @param array $V Furthest reaching D-paths |
||
| 104 | * @param array $snake Beginning x, y coordinates and the length of the middle snake |
||
| 105 | * @return int Length of the lcs |
||
| 106 | */ |
||
| 107 | private function lcsRec(int $bottomL1, int $topL1, int $bottomL2, int $topL2, array &$V, array &$snake): int |
||
| 108 | { |
||
| 109 | // Check that both sequences are non-empty. |
||
| 110 | if ($bottomL1 > $topL1 || $bottomL2 > $topL2) { |
||
| 111 | return 0; |
||
| 112 | } |
||
| 113 | |||
| 114 | $d = $this->findMiddleSnake($bottomL1, $topL1, $bottomL2, $topL2, $V, $snake); |
||
| 115 | |||
| 116 | // Need to restore these so we don't lose them when they're overwritten by the recursion. |
||
| 117 | $len = $snake[2]; |
||
| 118 | $startX = $snake[0]; |
||
| 119 | $startY = $snake[1]; |
||
| 120 | |||
| 121 | // The middle snake is part of the LCS, store it. |
||
| 122 | for ($i = 0; $i < $len; $i++) { |
||
| 123 | $this->setLcs($startX + $i, $startY + $i); |
||
| 124 | } |
||
| 125 | |||
| 126 | if ($d > 1) { |
||
| 127 | return $len + |
||
| 128 | $this->lcsRec($bottomL1, $startX - 1, $bottomL2, $startY - 1, $V, $snake) + |
||
| 129 | $this->lcsRec($startX + $len, $topL1, $startY + $len, $topL2, $V, $snake); |
||
| 130 | } elseif ($d === 1) { |
||
| 131 | // In this case the sequences differ by exactly 1 line. We have already saved all the lines after the |
||
| 132 | // difference in the for loop above, now we need to save all the lines before the difference. |
||
| 133 | $max = \min($startX - $bottomL1, $startY - $bottomL2); |
||
| 134 | |||
| 135 | for ($i = 0; $i < $max; $i++) { |
||
| 136 | $this->setLcs($bottomL1 + $i, $bottomL2 + $i); |
||
| 137 | } |
||
| 138 | |||
| 139 | return $max + $len; |
||
| 140 | } |
||
| 141 | |||
| 142 | return $len; |
||
| 143 | } |
||
| 144 | |||
| 145 | /** |
||
| 146 | * Helper function for Myers' LCS algorithm to find the middle snake for $l1[$bottomL1 .. $topL1] and $l2[$bottomL2 |
||
| 147 | * .. $topL2] The x, y coordinates of the start of the middle snake are saved in $snake[0], $snake[1] respectively |
||
| 148 | * and the length of the snake is saved in $snake[2]. |
||
| 149 | * |
||
| 150 | * @param int $bottomL1 Index in the first sequence to start from (inclusive) |
||
| 151 | * @param int $topL1 Index in the first sequence to end from (inclusive) |
||
| 152 | * @param int $bottomL2 Index in the second sequence to start from (inclusive) |
||
| 153 | * @param int $topL2 Index in the second sequence to end from (inclusive) |
||
| 154 | * @param int[][] $V Array storing the furthest reaching D-paths for the LCS computation |
||
| 155 | * @param int[] $snake Beginning x, y coordinates and the length of the middle snake |
||
| 156 | * @return int |
||
| 157 | */ |
||
| 158 | private function findMiddleSnake( |
||
| 282 | } |
||
| 283 | |||
| 284 | /** |
||
| 285 | * Takes the array with furthest reaching D-paths from an LCS computation and returns the x,y coordinates and |
||
| 286 | * progress made in the middle diagonal among those with maximum progress, both from the front and from the back. |
||
| 287 | * |
||
| 288 | * @param int $M Length of the first sequence for which LCS is being computed |
||
| 289 | * @param int $N Length of the second sequence for which LCS is being computed |
||
| 290 | * @param int $limit Number of steps made in an attempt to find the LCS from the front and back |
||
| 291 | * @param int[][] $V Array storing the furthest reaching D-paths for the LCS computation |
||
| 292 | * |
||
| 293 | * @return int[] Array of 3 integers where $result[0] is the x coordinate of the current location in the diagonal |
||
| 294 | * with the most progress, $result[1] is the y coordinate of the current location in the diagonal with |
||
| 295 | * the most progress and $result[2] is the amount of progress made in that diagonal. |
||
| 296 | */ |
||
| 297 | private function findMostProgress(int $M, int $N, int $limit, array &$V): array |
||
| 377 | } |
||
| 378 | |||
| 379 | /** |
||
| 380 | * @return int |
||
| 381 | */ |
||
| 382 | abstract protected function getLength1(): int; |
||
| 383 | |||
| 384 | /** |
||
| 385 | * @return int |
||
| 386 | */ |
||
| 387 | abstract protected function getLength2(): int; |
||
| 388 | |||
| 389 | /** |
||
| 390 | * @param int $i1 |
||
| 391 | * @param int $i2 |
||
| 392 | * @return bool |
||
| 393 | */ |
||
| 394 | abstract protected function isRangeEqual(int $i1, int $i2): bool; |
||
| 395 | |||
| 396 | /** |
||
| 397 | * @param int $lcsLength |
||
| 398 | */ |
||
| 399 | abstract protected function initializeLcs(int $lcsLength): void; |
||
| 400 | |||
| 401 | /** |
||
| 402 | * @param int $sl1 |
||
| 403 | * @param int $sl2 |
||
| 404 | */ |
||
| 405 | abstract protected function setLcs(int $sl1, int $sl2): void; |
||
| 406 | } |
||
| 407 |