Total Complexity | 64 |
Total Lines | 392 |
Duplicated Lines | 0 % |
Changes | 0 |
Complex classes like AbstractLCS often do a lot of different things. To break such a class down, we need to identify a cohesive component within that class. A common approach to find such a component is to look for fields/methods that share the same prefixes, or suffixes.
Once you have determined the fields that belong together, you can apply the Extract Class refactoring. If the component makes sense as a sub-class, Extract Subclass is also a candidate, and is often faster.
While breaking up the class, it is a good idea to analyze how other classes use AbstractLCS, and based on these observations, apply Extract Interface, too.
1 | <?php |
||
14 | abstract class AbstractLCS |
||
15 | { |
||
16 | /** @const float 10^8, the value of N*M when to start bindnig the run time. */ |
||
17 | private const TOO_LONG = 100000000.0; |
||
18 | |||
19 | /** @const float Limit the time to D^POW_LIMIT */ |
||
20 | private const POW_LIMIT = 1.5; |
||
21 | |||
22 | /** @var int */ |
||
23 | private $maxDifferences = 0; |
||
24 | |||
25 | /** @var int */ |
||
26 | private $length = 0; |
||
27 | |||
28 | /** |
||
29 | * @return int |
||
30 | */ |
||
31 | public function getLength(): int |
||
32 | { |
||
33 | return $this->length; |
||
34 | } |
||
35 | |||
36 | /** |
||
37 | * Myers' algorithm for longest common subsequence. O((M + N)D) worst case time, O(M + N + D^2) expected time, O(M |
||
38 | * + N) space (http://citeseer.ist.psu.edu/myers86ond.html) |
||
39 | * |
||
40 | * Note: Beyond implementing the algorithm as described in the paper I have added diagonal range compression which |
||
41 | * helps when finding the LCS of a very long and a very short sequence, also bound the running time to (N + M)^1.5 |
||
42 | * when both sequences are very long. |
||
43 | * |
||
44 | * After this method is called, the longest common subsequence is available by calling getResult() where result[0] |
||
45 | * is composed of entries from l1 and result[1] is composed of entries from l2 |
||
46 | */ |
||
47 | public function longestCommonSubsequence(): void |
||
48 | { |
||
49 | $length1 = $this->getLength1(); |
||
50 | $length2 = $this->getLength2(); |
||
51 | |||
52 | if (0 === $length1 || 0 === $length2) { |
||
53 | $this->length = 0; |
||
54 | |||
55 | return; |
||
56 | } |
||
57 | |||
58 | $this->maxDifferences = (int)(($length1 + $length2 + 1) / 2); |
||
59 | |||
60 | if ($length1 * $length2 > self::TOO_LONG) { |
||
61 | // Limit complexity to D^POW_LIMIT for long sequences. |
||
62 | $this->maxDifferences = (int)\pow($this->maxDifferences, self::POW_LIMIT - 1.0); |
||
63 | } |
||
64 | |||
65 | $this->initializeLcs($length1); |
||
66 | |||
67 | // The common prefixes and suffixes are always part of some LCS, include them now to reduce our search space. |
||
68 | $max = \min($length1, $length2); |
||
69 | |||
70 | for ( |
||
71 | $forwardBound = 0; |
||
72 | $forwardBound < $max && $this->isRangeEqual($forwardBound, $forwardBound); |
||
73 | $forwardBound++ |
||
74 | ) { |
||
75 | $this->setLcs($forwardBound, $forwardBound); |
||
76 | } |
||
77 | |||
78 | $backBoundL1 = $length1 - 1; |
||
79 | $backBoundL2 = $length2 - 1; |
||
80 | |||
81 | while ($backBoundL1 >= $forwardBound && $backBoundL2 >= $forwardBound && |
||
82 | $this->isRangeEqual($backBoundL1, $backBoundL2)) { |
||
83 | $this->setLcs($backBoundL1, $backBoundL2); |
||
84 | $backBoundL1--; |
||
85 | $backBoundL2--; |
||
86 | } |
||
87 | |||
88 | $V = array_fill(0, 2, array_fill(0, $length1 + $length2 + 1, 0)); |
||
89 | $snake = array_fill(0, 3, 0); |
||
90 | $lcsRec = $this->lcsRec($forwardBound, $backBoundL1, $forwardBound, $backBoundL2, $V, $snake); |
||
91 | |||
92 | $this->length = $forwardBound + $length1 - $backBoundL1 - 1 + $lcsRec; |
||
93 | } |
||
94 | |||
95 | /** |
||
96 | * The recursive helper function for Myers' LCS. Computes the LCS of $l1[$bottomL1 .. $topL1] and $l2[$bottomL2 .. |
||
97 | * $topL2] fills in the appropriate location in lcs and returns the length. |
||
98 | * |
||
99 | * @param int $bottomL1 The first sequence |
||
100 | * @param int $topL1 Index in the first sequence to start from (inclusive) |
||
101 | * @param int $bottomL2 The second sequence |
||
102 | * @param int $topL2 Index in the second sequence to start from (inclusive) |
||
103 | * @param array $V Furthest reaching D-paths |
||
104 | * @param array $snake Beginning x, y coordinates and the length of the middle snake |
||
105 | * @return int Length of the lcs |
||
106 | */ |
||
107 | private function lcsRec(int $bottomL1, int $topL1, int $bottomL2, int $topL2, array &$V, array &$snake): int |
||
108 | { |
||
109 | // Check that both sequences are non-empty. |
||
110 | if ($bottomL1 > $topL1 || $bottomL2 > $topL2) { |
||
111 | return 0; |
||
112 | } |
||
113 | |||
114 | $d = $this->findMiddleSnake($bottomL1, $topL1, $bottomL2, $topL2, $V, $snake); |
||
115 | |||
116 | // Need to restore these so we don't lose them when they're overwritten by the recursion. |
||
117 | $len = $snake[2]; |
||
118 | $startX = $snake[0]; |
||
119 | $startY = $snake[1]; |
||
120 | |||
121 | // The middle snake is part of the LCS, store it. |
||
122 | for ($i = 0; $i < $len; $i++) { |
||
123 | $this->setLcs($startX + $i, $startY + $i); |
||
124 | } |
||
125 | |||
126 | if ($d > 1) { |
||
127 | return $len + |
||
128 | $this->lcsRec($bottomL1, $startX - 1, $bottomL2, $startY - 1, $V, $snake) + |
||
129 | $this->lcsRec($startX + $len, $topL1, $startY + $len, $topL2, $V, $snake); |
||
130 | } elseif ($d === 1) { |
||
131 | // In this case the sequences differ by exactly 1 line. We have already saved all the lines after the |
||
132 | // difference in the for loop above, now we need to save all the lines before the difference. |
||
133 | $max = \min($startX - $bottomL1, $startY - $bottomL2); |
||
134 | |||
135 | for ($i = 0; $i < $max; $i++) { |
||
136 | $this->setLcs($bottomL1 + $i, $bottomL2 + $i); |
||
137 | } |
||
138 | |||
139 | return $max + $len; |
||
140 | } |
||
141 | |||
142 | return $len; |
||
143 | } |
||
144 | |||
145 | /** |
||
146 | * Helper function for Myers' LCS algorithm to find the middle snake for $l1[$bottomL1 .. $topL1] and $l2[$bottomL2 |
||
147 | * .. $topL2] The x, y coordinates of the start of the middle snake are saved in $snake[0], $snake[1] respectively |
||
148 | * and the length of the snake is saved in $snake[2]. |
||
149 | * |
||
150 | * @param int $bottomL1 Index in the first sequence to start from (inclusive) |
||
151 | * @param int $topL1 Index in the first sequence to end from (inclusive) |
||
152 | * @param int $bottomL2 Index in the second sequence to start from (inclusive) |
||
153 | * @param int $topL2 Index in the second sequence to end from (inclusive) |
||
154 | * @param int[][] $V Array storing the furthest reaching D-paths for the LCS computation |
||
155 | * @param int[] $snake Beginning x, y coordinates and the length of the middle snake |
||
156 | * @return int |
||
157 | */ |
||
158 | private function findMiddleSnake( |
||
282 | } |
||
283 | |||
284 | /** |
||
285 | * Takes the array with furthest reaching D-paths from an LCS computation and returns the x,y coordinates and |
||
286 | * progress made in the middle diagonal among those with maximum progress, both from the front and from the back. |
||
287 | * |
||
288 | * @param int $M Length of the first sequence for which LCS is being computed |
||
289 | * @param int $N Length of the second sequence for which LCS is being computed |
||
290 | * @param int $limit Number of steps made in an attempt to find the LCS from the front and back |
||
291 | * @param int[][] $V Array storing the furthest reaching D-paths for the LCS computation |
||
292 | * |
||
293 | * @return int[] Array of 3 integers where $result[0] is the x coordinate of the current location in the diagonal |
||
294 | * with the most progress, $result[1] is the y coordinate of the current location in the diagonal with |
||
295 | * the most progress and $result[2] is the amount of progress made in that diagonal. |
||
296 | */ |
||
297 | private function findMostProgress(int $M, int $N, int $limit, array &$V): array |
||
377 | } |
||
378 | |||
379 | /** |
||
380 | * @return int |
||
381 | */ |
||
382 | abstract protected function getLength1(): int; |
||
383 | |||
384 | /** |
||
385 | * @return int |
||
386 | */ |
||
387 | abstract protected function getLength2(): int; |
||
388 | |||
389 | /** |
||
390 | * @param int $i1 |
||
391 | * @param int $i2 |
||
392 | * @return bool |
||
393 | */ |
||
394 | abstract protected function isRangeEqual(int $i1, int $i2): bool; |
||
395 | |||
396 | /** |
||
397 | * @param int $lcsLength |
||
398 | */ |
||
399 | abstract protected function initializeLcs(int $lcsLength): void; |
||
400 | |||
401 | /** |
||
402 | * @param int $sl1 |
||
403 | * @param int $sl2 |
||
404 | */ |
||
405 | abstract protected function setLcs(int $sl1, int $sl2): void; |
||
406 | } |
||
407 |