| Total Complexity | 64 |
| Total Lines | 392 |
| Duplicated Lines | 0 % |
| Changes | 0 | ||
Complex classes like AbstractLCS often do a lot of different things. To break such a class down, we need to identify a cohesive component within that class. A common approach to find such a component is to look for fields/methods that share the same prefixes, or suffixes.
Once you have determined the fields that belong together, you can apply the Extract Class refactoring. If the component makes sense as a sub-class, Extract Subclass is also a candidate, and is often faster.
While breaking up the class, it is a good idea to analyze how other classes use AbstractLCS, and based on these observations, apply Extract Interface, too.
| 1 | <?php |
||
| 13 | abstract class AbstractLCS |
||
| 14 | { |
||
| 15 | /** @const float 10^8, the value of N*M when to start bindnig the run time. */ |
||
| 16 | private const TOO_LONG = 100000000.0; |
||
| 17 | |||
| 18 | /** @const float Limit the time to D^POW_LIMIT */ |
||
| 19 | private const POW_LIMIT = 1.5; |
||
| 20 | |||
| 21 | /** @var int */ |
||
| 22 | private $maxDifferences = 0; |
||
| 23 | |||
| 24 | /** @var int */ |
||
| 25 | private $length = 0; |
||
| 26 | |||
| 27 | /** |
||
| 28 | * @return int |
||
| 29 | */ |
||
| 30 | public function getLength(): int |
||
| 31 | { |
||
| 32 | return $this->length; |
||
| 33 | } |
||
| 34 | |||
| 35 | /** |
||
| 36 | * Myers' algorithm for longest common subsequence. O((M + N)D) worst case time, O(M + N + D^2) expected time, O(M |
||
| 37 | * + N) space (http://citeseer.ist.psu.edu/myers86ond.html) |
||
| 38 | * |
||
| 39 | * Note: Beyond implementing the algorithm as described in the paper I have added diagonal range compression which |
||
| 40 | * helps when finding the LCS of a very long and a very short sequence, also bound the running time to (N + M)^1.5 |
||
| 41 | * when both sequences are very long. |
||
| 42 | * |
||
| 43 | * After this method is called, the longest common subsequence is available by calling getResult() where result[0] |
||
| 44 | * is composed of entries from l1 and result[1] is composed of entries from l2 |
||
| 45 | */ |
||
| 46 | public function longestCommonSubsequence(): void |
||
| 47 | { |
||
| 48 | $length1 = $this->getLength1(); |
||
| 49 | $length2 = $this->getLength2(); |
||
| 50 | |||
| 51 | if (0 === $length1 || 0 === $length2) { |
||
| 52 | $this->length = 0; |
||
| 53 | |||
| 54 | return; |
||
| 55 | } |
||
| 56 | |||
| 57 | $this->maxDifferences = (int)(($length1 + $length2 + 1) / 2); |
||
| 58 | |||
| 59 | if ($length1 * $length2 > self::TOO_LONG) { |
||
| 60 | // Limit complexity to D^POW_LIMIT for long sequences. |
||
| 61 | $this->maxDifferences = (int)\pow($this->maxDifferences, self::POW_LIMIT - 1.0); |
||
| 62 | } |
||
| 63 | |||
| 64 | $this->initializeLcs($length1); |
||
| 65 | |||
| 66 | // The common prefixes and suffixes are always part of some LCS, include them now to reduce our search space. |
||
| 67 | $max = \min($length1, $length2); |
||
| 68 | |||
| 69 | for ( |
||
| 70 | $forwardBound = 0; |
||
| 71 | $forwardBound < $max && $this->isRangeEqual($forwardBound, $forwardBound); |
||
| 72 | $forwardBound++ |
||
| 73 | ) { |
||
| 74 | $this->setLcs($forwardBound, $forwardBound); |
||
| 75 | } |
||
| 76 | |||
| 77 | $backBoundL1 = $length1 - 1; |
||
| 78 | $backBoundL2 = $length2 - 1; |
||
| 79 | |||
| 80 | while ($backBoundL1 >= $forwardBound && $backBoundL2 >= $forwardBound && |
||
| 81 | $this->isRangeEqual($backBoundL1, $backBoundL2)) { |
||
| 82 | $this->setLcs($backBoundL1, $backBoundL2); |
||
| 83 | $backBoundL1--; |
||
| 84 | $backBoundL2--; |
||
| 85 | } |
||
| 86 | |||
| 87 | $V = \array_fill(0, 2, \array_fill(0, $length1 + $length2 + 1, 0)); |
||
| 88 | $snake = [0, 0, 0]; |
||
| 89 | $lcsRec = $this->lcsRec($forwardBound, $backBoundL1, $forwardBound, $backBoundL2, $V, $snake); |
||
| 90 | |||
| 91 | $this->length = $forwardBound + $length1 - $backBoundL1 - 1 + $lcsRec; |
||
| 92 | } |
||
| 93 | |||
| 94 | /** |
||
| 95 | * The recursive helper function for Myers' LCS. Computes the LCS of $l1[$bottomL1 .. $topL1] and $l2[$bottomL2 .. |
||
| 96 | * $topL2] fills in the appropriate location in lcs and returns the length. |
||
| 97 | * |
||
| 98 | * @param int $bottomL1 The first sequence |
||
| 99 | * @param int $topL1 Index in the first sequence to start from (inclusive) |
||
| 100 | * @param int $bottomL2 The second sequence |
||
| 101 | * @param int $topL2 Index in the second sequence to start from (inclusive) |
||
| 102 | * @param array $V Furthest reaching D-paths |
||
| 103 | * @param array $snake Beginning x, y coordinates and the length of the middle snake |
||
| 104 | * @return int Length of the lcs |
||
| 105 | */ |
||
| 106 | private function lcsRec(int $bottomL1, int $topL1, int $bottomL2, int $topL2, array &$V, array &$snake): int |
||
| 107 | { |
||
| 108 | // Check that both sequences are non-empty. |
||
| 109 | if ($bottomL1 > $topL1 || $bottomL2 > $topL2) { |
||
| 110 | return 0; |
||
| 111 | } |
||
| 112 | |||
| 113 | $d = $this->findMiddleSnake($bottomL1, $topL1, $bottomL2, $topL2, $V, $snake); |
||
| 114 | |||
| 115 | // Need to restore these so we don't lose them when they're overwritten by the recursion. |
||
| 116 | $len = $snake[2]; |
||
| 117 | $startX = $snake[0]; |
||
| 118 | $startY = $snake[1]; |
||
| 119 | |||
| 120 | // The middle snake is part of the LCS, store it. |
||
| 121 | for ($i = 0; $i < $len; $i++) { |
||
| 122 | $this->setLcs($startX + $i, $startY + $i); |
||
| 123 | } |
||
| 124 | |||
| 125 | if ($d > 1) { |
||
| 126 | return $len + |
||
| 127 | $this->lcsRec($bottomL1, $startX - 1, $bottomL2, $startY - 1, $V, $snake) + |
||
| 128 | $this->lcsRec($startX + $len, $topL1, $startY + $len, $topL2, $V, $snake); |
||
| 129 | } elseif ($d === 1) { |
||
| 130 | // In this case the sequences differ by exactly 1 line. We have already saved all the lines after the |
||
| 131 | // difference in the for loop above, now we need to save all the lines before the difference. |
||
| 132 | $max = \min($startX - $bottomL1, $startY - $bottomL2); |
||
| 133 | |||
| 134 | for ($i = 0; $i < $max; $i++) { |
||
| 135 | $this->setLcs($bottomL1 + $i, $bottomL2 + $i); |
||
| 136 | } |
||
| 137 | |||
| 138 | return $max + $len; |
||
| 139 | } |
||
| 140 | |||
| 141 | return $len; |
||
| 142 | } |
||
| 143 | |||
| 144 | /** |
||
| 145 | * Helper function for Myers' LCS algorithm to find the middle snake for $l1[$bottomL1 .. $topL1] and $l2[$bottomL2 |
||
| 146 | * .. $topL2] The x, y coordinates of the start of the middle snake are saved in $snake[0], $snake[1] respectively |
||
| 147 | * and the length of the snake is saved in $snake[2]. |
||
| 148 | * |
||
| 149 | * @param int $bottomL1 Index in the first sequence to start from (inclusive) |
||
| 150 | * @param int $topL1 Index in the first sequence to end from (inclusive) |
||
| 151 | * @param int $bottomL2 Index in the second sequence to start from (inclusive) |
||
| 152 | * @param int $topL2 Index in the second sequence to end from (inclusive) |
||
| 153 | * @param int[][] $V Array storing the furthest reaching D-paths for the LCS computation |
||
| 154 | * @param int[] $snake Beginning x, y coordinates and the length of the middle snake |
||
| 155 | * @return int |
||
| 156 | */ |
||
| 157 | private function findMiddleSnake( |
||
| 158 | int $bottomL1, |
||
| 159 | int $topL1, |
||
| 160 | int $bottomL2, |
||
| 161 | int $topL2, |
||
| 162 | array &$V, |
||
| 163 | array &$snake |
||
| 164 | ): int { |
||
| 165 | $N = $topL1 - $bottomL1 + 1; |
||
| 166 | $M = $topL2 - $bottomL2 + 1; |
||
| 167 | |||
| 168 | $delta = $N - $M; |
||
| 169 | $isEven = ($delta & 1) === 1 ? false : true; |
||
| 170 | |||
| 171 | $limit = \min($this->maxDifferences, (int)(($N + $M + 1) / 2)); |
||
| 172 | |||
| 173 | // Offset to make it odd/even. |
||
| 174 | // a 0 or 1 that we add to the start offset to make it odd/even |
||
| 175 | $valueToAddForward = ($M & 1) === 1 ? 1 : 0; |
||
| 176 | $valueToAddBackward = ($N & 1) === 1 ? 1 : 0; |
||
| 177 | |||
| 178 | $startForward = -$M; |
||
| 179 | $endForward = $N; |
||
| 180 | $startBackward = -$N; |
||
| 181 | $endBackward = $M; |
||
| 182 | |||
| 183 | $V[0][$limit + 1] = 0; |
||
| 184 | $V[1][$limit - 1] = $N; |
||
| 185 | |||
| 186 | for ($d = 0; $d <= $limit; $d++) { |
||
| 187 | $startDiag = \max($valueToAddForward + $startForward, -$d); |
||
| 188 | $endDiag = \min($endForward, $d); |
||
| 189 | $valueToAddForward = 1 - $valueToAddForward; |
||
| 190 | |||
| 191 | // Compute forward furthest reaching paths. |
||
| 192 | for ($k = $startDiag; $k <= $endDiag; $k += 2) { |
||
| 193 | if ($k === -$d || ($k < $d && $V[0][$limit + $k - 1] < $V[0][$limit + $k + 1])) { |
||
| 194 | $x = $V[0][$limit + $k + 1]; |
||
| 195 | } else { |
||
| 196 | $x = $V[0][$limit + $k - 1] + 1; |
||
| 197 | } |
||
| 198 | |||
| 199 | $y = $x - $k; |
||
| 200 | |||
| 201 | $snake[0] = $x + $bottomL1; |
||
| 202 | $snake[1] = $y + $bottomL2; |
||
| 203 | $snake[2] = 0; |
||
| 204 | |||
| 205 | while ($x < $N && $y < $M && $this->isRangeEqual($x + $bottomL1, $y + $bottomL2)) { |
||
| 206 | $x++; |
||
| 207 | $y++; |
||
| 208 | $snake[2]++; |
||
| 209 | } |
||
| 210 | |||
| 211 | $V[0][$limit + $k] = $x; |
||
| 212 | |||
| 213 | if (!$isEven && $k >= $delta - $d + 1 && $k <= $delta + $d - 1 && $x >= $V[1][$limit + $k - $delta]) { |
||
| 214 | return 2 * $d - 1; |
||
| 215 | } |
||
| 216 | |||
| 217 | // Check to see if we can cut down the diagonal range. |
||
| 218 | if ($x >= $N && $endForward > $k - 1) { |
||
| 219 | $endForward = $k - 1; |
||
| 220 | } elseif ($y >= $M) { |
||
| 221 | $startForward = $k + 1; |
||
| 222 | $valueToAddForward = 0; |
||
| 223 | } |
||
| 224 | } |
||
| 225 | |||
| 226 | $startDiag = \max($valueToAddBackward + $startBackward, -$d); |
||
| 227 | $endDiag = \min($endBackward, $d); |
||
| 228 | $valueToAddBackward = 1 - $valueToAddBackward; |
||
| 229 | |||
| 230 | // Compute backward furthest reaching paths. |
||
| 231 | for ($k = $startDiag; $k <= $endDiag; $k += 2) { |
||
| 232 | if ($k === $d || ($k !== -$d && $V[1][$limit + $k - 1] < $V[1][$limit + $k + 1])) { |
||
| 233 | $x = $V[1][$limit + $k - 1]; |
||
| 234 | } else { |
||
| 235 | $x = $V[1][$limit + $k + 1] - 1; |
||
| 236 | } |
||
| 237 | |||
| 238 | $y = $x - $k - $delta; |
||
| 239 | $snake[2] = 0; |
||
| 240 | |||
| 241 | while ($x > 0 && $y > 0 && $this->isRangeEqual($x - 1 + $bottomL1, $y - 1 + $bottomL2)) { |
||
| 242 | $x--; |
||
| 243 | $y--; |
||
| 244 | $snake[2]++; |
||
| 245 | } |
||
| 246 | |||
| 247 | $V[1][$limit + $k] = $x; |
||
| 248 | |||
| 249 | if ($isEven && $k >= -$delta - $d && $k <= $d - $delta && $x <= $V[0][$limit + $k + $delta]) { |
||
| 250 | $snake[0] = $bottomL1 + $x; |
||
| 251 | $snake[1] = $bottomL2 + $y; |
||
| 252 | |||
| 253 | return 2 * $d; |
||
| 254 | } |
||
| 255 | |||
| 256 | // Check to see if we can cut down our diagonal range. |
||
| 257 | if ($x <= 0) { |
||
| 258 | $startBackward = $k + 1; |
||
| 259 | $valueToAddBackward = 0; |
||
| 260 | } elseif ($y <= 0 && $endBackward > $k - 1) { |
||
| 261 | $endBackward = $k - 1; |
||
| 262 | } |
||
| 263 | } |
||
| 264 | } |
||
| 265 | |||
| 266 | // Computing the true LCS is too expensive, instead find the diagonal with the most progress and pretend a |
||
| 267 | // middle snake of length 0 occurs there. |
||
| 268 | $mostProgress = $this->findMostProgress($M, $N, $limit, $V); |
||
| 269 | |||
| 270 | $snake[0] = $bottomL1 + $mostProgress[0]; |
||
| 271 | $snake[1] = $bottomL2 + $mostProgress[1]; |
||
| 272 | $snake[2] = 0; |
||
| 273 | |||
| 274 | /* |
||
| 275 | * HACK: Since we didn't really finish the LCS computation we don't really know the length of the SES. We don't |
||
| 276 | * do anything with the result anyway, unless it's <=1. We know for a fact SES > 1 so 5 is as good a number as |
||
| 277 | * any to return here. |
||
| 278 | */ |
||
| 279 | |||
| 280 | return 5; |
||
| 281 | } |
||
| 282 | |||
| 283 | /** |
||
| 284 | * Takes the array with furthest reaching D-paths from an LCS computation and returns the x,y coordinates and |
||
| 285 | * progress made in the middle diagonal among those with maximum progress, both from the front and from the back. |
||
| 286 | * |
||
| 287 | * @param int $M Length of the first sequence for which LCS is being computed |
||
| 288 | * @param int $N Length of the second sequence for which LCS is being computed |
||
| 289 | * @param int $limit Number of steps made in an attempt to find the LCS from the front and back |
||
| 290 | * @param int[][] $V Array storing the furthest reaching D-paths for the LCS computation |
||
| 291 | * |
||
| 292 | * @return int[] Array of 3 integers where $result[0] is the x coordinate of the current location in the diagonal |
||
| 293 | * with the most progress, $result[1] is the y coordinate of the current location in the diagonal with |
||
| 294 | * the most progress and $result[2] is the amount of progress made in that diagonal. |
||
| 295 | */ |
||
| 296 | private function findMostProgress(int $M, int $N, int $limit, array &$V): array |
||
| 376 | } |
||
| 377 | |||
| 378 | /** |
||
| 379 | * @return int |
||
| 380 | */ |
||
| 381 | abstract protected function getLength1(): int; |
||
| 382 | |||
| 383 | /** |
||
| 384 | * @return int |
||
| 385 | */ |
||
| 386 | abstract protected function getLength2(): int; |
||
| 387 | |||
| 388 | /** |
||
| 389 | * @param int $i1 |
||
| 390 | * @param int $i2 |
||
| 391 | * @return bool |
||
| 392 | */ |
||
| 393 | abstract protected function isRangeEqual(int $i1, int $i2): bool; |
||
| 394 | |||
| 395 | /** |
||
| 396 | * @param int $lcsLength |
||
| 397 | */ |
||
| 398 | abstract protected function initializeLcs(int $lcsLength): void; |
||
| 399 | |||
| 400 | /** |
||
| 401 | * @param int $sl1 |
||
| 402 | * @param int $sl2 |
||
| 403 | */ |
||
| 404 | abstract protected function setLcs(int $sl1, int $sl2): void; |
||
| 405 | } |
||
| 406 |