LCS   F
last analyzed

Complexity

Total Complexity 64

Size/Duplication

Total Lines 389
Duplicated Lines 0 %

Test Coverage

Coverage 90.12%

Importance

Changes 0
Metric Value
eloc 164
dl 0
loc 389
ccs 146
cts 162
cp 0.9012
rs 3.28
c 0
b 0
f 0
wmc 64

5 Methods

Rating   Name   Duplication   Size   Complexity  
B longestCommonSubsequence() 0 43 9
A getLength() 0 3 1
F findMiddleSnake() 0 124 33
B lcsRec() 0 36 7
C findMostProgress() 0 80 14

How to fix   Complexity   

Complex Class

Complex classes like LCS often do a lot of different things. To break such a class down, we need to identify a cohesive component within that class. A common approach to find such a component is to look for fields/methods that share the same prefixes, or suffixes.

Once you have determined the fields that belong together, you can apply the Extract Class refactoring. If the component makes sense as a sub-class, Extract Subclass is also a candidate, and is often faster.

While breaking up the class, it is a good idea to analyze how other classes use LCS, and based on these observations, apply Extract Interface, too.

1
<?php
2
/**
3
 * (c) Steve Nebes <[email protected]>
4
 *
5
 * For the full copyright and license information, please view the LICENSE
6
 * file that was distributed with this source code.
7
 */
8
9
declare(strict_types=1);
10
11
namespace SN\RangeDifferencer\Core;
12
13
/**
14
 * Myers' algorithm for longest common subsequence.
15
 */
16
abstract class LCS
17
{
18
    /** @const float 10^8, the value of N*M when to start binding the run time. */
19
    private const TOO_LONG = 100000000.0;
20
21
    /** @const float Limit the time to D^POW_LIMIT */
22
    private const POW_LIMIT = 1.5;
23
24
    /** @var int */
25
    private $maxDifferences = 0;
26
27
    /** @var int */
28
    private $length = 0;
29
30
    /**
31
     * @return int
32
     */
33 33
    public function getLength(): int
34
    {
35 33
        return $this->length;
36
    }
37
38
    /**
39
     * Myers' algorithm for longest common subsequence. O((M + N)D) worst case time, O(M + N + D^2) expected time, O(M
40
     * + N) space (http://citeseer.ist.psu.edu/myers86ond.html)
41
     *
42
     * Note: Beyond implementing the algorithm as described in the paper I have added diagonal range compression which
43
     * helps when finding the LCS of a very long and a very short sequence, also bound the running time to (N + M)^1.5
44
     * when both sequences are very long.
45
     *
46
     * After this method is called, the longest common subsequence is available by calling getResult() where result[0]
47
     * is composed of entries from l1 and result[1] is composed of entries from l2
48
     */
49 23
    public function longestCommonSubsequence(): void
50
    {
51 23
        $length1 = $this->getLength1();
52 23
        $length2 = $this->getLength2();
53
54 23
        if (0 === $length1 || 0 === $length2) {
55
            $this->length = 0;
56
57
            return;
58
        }
59
60 23
        $this->maxDifferences = (int) (($length1 + $length2 + 1) / 2);
61
62 23
        if ($length1 * $length2 > self::TOO_LONG) {
63
            // Limit complexity to D^POW_LIMIT for long sequences.
64
            $this->maxDifferences = (int) \pow($this->maxDifferences, self::POW_LIMIT - 1.0);
65
        }
66
67 23
        $this->initializeLcs($length1);
68
69
        // The common prefixes and suffixes are always part of some LCS, include them now to reduce our search space.
70 23
        $max = \min($length1, $length2);
71
72 23
        for ($forwardBound = 0;
73 23
             $forwardBound < $max && $this->isRangeEqual($forwardBound, $forwardBound); $forwardBound++) {
74 19
            $this->setLcs($forwardBound, $forwardBound);
75
        }
76
77 23
        $backBoundL1 = $length1 - 1;
78 23
        $backBoundL2 = $length2 - 1;
79
80 23
        while ($backBoundL1 >= $forwardBound && $backBoundL2 >= $forwardBound &&
81 23
            $this->isRangeEqual($backBoundL1, $backBoundL2)) {
82 18
            $this->setLcs($backBoundL1, $backBoundL2);
83 18
            $backBoundL1--;
84 18
            $backBoundL2--;
85
        }
86
87 23
        $V = \array_fill(0, 2, \array_fill(0, $length1 + $length2 + 1, 0));
88 23
        $snake = [0, 0, 0];
89 23
        $lcsRec = $this->lcsRec($forwardBound, $backBoundL1, $forwardBound, $backBoundL2, $V, $snake);
90
91 23
        $this->length = $forwardBound + $length1 - $backBoundL1 - 1 + $lcsRec;
92 23
    }
93
94
    /**
95
     * The recursive helper function for Myers' LCS. Computes the LCS of $l1[$bottomL1 .. $topL1] and $l2[$bottomL2 ..
96
     * $topL2] fills in the appropriate location in lcs and returns the length.
97
     *
98
     * @param int   $bottomL1 The first sequence
99
     * @param int   $topL1    Index in the first sequence to start from (inclusive)
100
     * @param int   $bottomL2 The second sequence
101
     * @param int   $topL2    Index in the second sequence to start from (inclusive)
102
     * @param array $V        Furthest reaching D-paths
103
     * @param array $snake    Beginning x, y coordinates and the length of the middle snake
104
     * @return int Length of the lcs
105
     */
106 23
    private function lcsRec(int $bottomL1, int $topL1, int $bottomL2, int $topL2, array &$V, array &$snake): int
107
    {
108
        // Check that both sequences are non-empty.
109 23
        if ($bottomL1 > $topL1 || $bottomL2 > $topL2) {
110 23
            return (int) 0;
111
        }
112
113 4
        $d = $this->findMiddleSnake($bottomL1, $topL1, $bottomL2, $topL2, $V, $snake);
114
115
        // Need to restore these so we don't lose them when they're overwritten by the recursion.
116 4
        $len = $snake[2];
117 4
        $startX = $snake[0];
118 4
        $startY = $snake[1];
119
120
        // The middle snake is part of the LCS, store it.
121 4
        for ($i = 0; $i < $len; $i++) {
122 4
            $this->setLcs($startX + $i, $startY + $i);
123
        }
124
125 4
        if ($d > 1) {
126
            return (int) ($len +
127 4
                $this->lcsRec($bottomL1, $startX - 1, $bottomL2, $startY - 1, $V, $snake) +
128 4
                $this->lcsRec($startX + $len, $topL1, $startY + $len, $topL2, $V, $snake));
129
        } elseif ($d === 1) {
130
            // In this case the sequences differ by exactly 1 line. We have already saved all the lines after the
131
            // difference in the for loop above, now we need to save all the lines before the difference.
132
            $max = \min($startX - $bottomL1, $startY - $bottomL2);
133
134
            for ($i = 0; $i < $max; $i++) {
135
                $this->setLcs($bottomL1 + $i, $bottomL2 + $i);
136
            }
137
138
            return (int) ($max + $len);
139
        }
140
141
        return (int) $len;
142
    }
143
144
    /**
145
     * Helper function for Myers' LCS algorithm to find the middle snake for $l1[$bottomL1 .. $topL1] and $l2[$bottomL2
146
     * .. $topL2] The x, y coordinates of the start of the middle snake are saved in $snake[0], $snake[1] respectively
147
     * and the length of the snake is saved in $snake[2].
148
     *
149
     * @param int     $bottomL1 Index in the first sequence to start from (inclusive)
150
     * @param int     $topL1    Index in the first sequence to end from (inclusive)
151
     * @param int     $bottomL2 Index in the second sequence to start from (inclusive)
152
     * @param int     $topL2    Index in the second sequence to end from (inclusive)
153
     * @param int[][] $V        Array storing the furthest reaching D-paths for the LCS computation
154
     * @param int[]   $snake    Beginning x, y coordinates and the length of the middle snake
155
     * @return int
156
     */
157 4
    private function findMiddleSnake(
158
        int $bottomL1,
159
        int $topL1,
160
        int $bottomL2,
161
        int $topL2,
162
        array &$V,
163
        array &$snake
164
    ): int {
165 4
        $N = $topL1 - $bottomL1 + 1;
166 4
        $M = $topL2 - $bottomL2 + 1;
167
168 4
        $delta = $N - $M;
169 4
        $isEven = ($delta & 1) === 1 ? false : true;
170
171 4
        $limit = \min($this->maxDifferences, (int) (($N + $M + 1) / 2));
172
173
        // Offset to make it odd/even.
174
        // a 0 or 1 that we add to the start offset to make it odd/even
175 4
        $valueToAddForward = ($M & 1) === 1 ? 1 : 0;
176 4
        $valueToAddBackward = ($N & 1) === 1 ? 1 : 0;
177
178 4
        $startForward = -$M;
179 4
        $endForward = $N;
180 4
        $startBackward = -$N;
181 4
        $endBackward = $M;
182
183 4
        $V[0][$limit + 1] = 0;
184 4
        $V[1][$limit - 1] = $N;
185
186 4
        for ($d = 0; $d <= $limit; $d++) {
187 4
            $startDiag = \max($valueToAddForward + $startForward, -$d);
188 4
            $endDiag = \min($endForward, $d);
189 4
            $valueToAddForward = 1 - $valueToAddForward;
190
191
            // Compute forward furthest reaching paths.
192 4
            for ($k = $startDiag; $k <= $endDiag; $k += 2) {
193 4
                if ($k === -$d || ($k < $d && $V[0][$limit + $k - 1] < $V[0][$limit + $k + 1])) {
194 4
                    $x = $V[0][$limit + $k + 1];
195
                } else {
196 4
                    $x = $V[0][$limit + $k - 1] + 1;
197
                }
198
199 4
                $y = $x - $k;
200
201 4
                $snake[0] = $x + $bottomL1;
202 4
                $snake[1] = $y + $bottomL2;
203 4
                $snake[2] = 0;
204
205 4
                while ($x < $N && $y < $M && $this->isRangeEqual($x + $bottomL1, $y + $bottomL2)) {
206 4
                    $x++;
207 4
                    $y++;
208 4
                    $snake[2]++;
209
                }
210
211 4
                $V[0][$limit + $k] = $x;
212
213 4
                if (!$isEven && $k >= $delta - $d + 1 && $k <= $delta + $d - 1 && $x >= $V[1][$limit + $k - $delta]) {
214 4
                    return (int) (2 * $d - 1);
215
                }
216
217
                // Check to see if we can cut down the diagonal range.
218 4
                if ($x >= $N && $endForward > $k - 1) {
219 1
                    $endForward = $k - 1;
220 4
                } elseif ($y >= $M) {
221 3
                    $startForward = $k + 1;
222 3
                    $valueToAddForward = 0;
223
                }
224
            }
225
226 4
            $startDiag = \max($valueToAddBackward + $startBackward, -$d);
227 4
            $endDiag = \min($endBackward, $d);
228 4
            $valueToAddBackward = 1 - $valueToAddBackward;
229
230
            // Compute backward furthest reaching paths.
231 4
            for ($k = $startDiag; $k <= $endDiag; $k += 2) {
232 4
                if ($k === $d || ($k !== -$d && $V[1][$limit + $k - 1] < $V[1][$limit + $k + 1])) {
233 4
                    $x = $V[1][$limit + $k - 1];
234
                } else {
235 4
                    $x = $V[1][$limit + $k + 1] - 1;
236
                }
237
238 4
                $y = $x - $k - $delta;
239 4
                $snake[2] = 0;
240
241 4
                while ($x > 0 && $y > 0 && $this->isRangeEqual($x - 1 + $bottomL1, $y - 1 + $bottomL2)) {
242 4
                    $x--;
243 4
                    $y--;
244 4
                    $snake[2]++;
245
                }
246
247 4
                $V[1][$limit + $k] = $x;
248
249 4
                if ($isEven && $k >= -$delta - $d && $k <= $d - $delta && $x <= $V[0][$limit + $k + $delta]) {
250 3
                    $snake[0] = $bottomL1 + $x;
251 3
                    $snake[1] = $bottomL2 + $y;
252
253 3
                    return (int) (2 * $d);
254
                }
255
256
                // Check to see if we can cut down our diagonal range.
257 4
                if ($x <= 0) {
258 1
                    $startBackward = $k + 1;
259 1
                    $valueToAddBackward = 0;
260 4
                } elseif ($y <= 0 && $endBackward > $k - 1) {
261 3
                    $endBackward = $k - 1;
262
                }
263
            }
264
        }
265
266
        // Computing the true LCS is too expensive, instead find the diagonal with the most progress and pretend a
267
        // middle snake of length 0 occurs there.
268
        $mostProgress = $this->findMostProgress($M, $N, $limit, $V);
269
270
        $snake[0] = $bottomL1 + $mostProgress[0];
271
        $snake[1] = $bottomL2 + $mostProgress[1];
272
        $snake[2] = 0;
273
274
        /*
275
         * HACK: Since we didn't really finish the LCS computation we don't really know the length of the SES. We don't
276
         * do anything with the result anyway, unless it's <=1. We know for a fact SES > 1 so 5 is as good a number as
277
         * any to return here.
278
         */
279
280
        return 5;
281
    }
282
283
    /**
284
     * Takes the array with furthest reaching D-paths from an LCS computation and returns the x,y coordinates and
285
     * progress made in the middle diagonal among those with maximum progress, both from the front and from the back.
286
     *
287
     * @param int     $M     Length of the first sequence for which LCS is being computed
288
     * @param int     $N     Length of the second sequence for which LCS is being computed
289
     * @param int     $limit Number of steps made in an attempt to find the LCS from the front and back
290
     * @param int[][] $V     Array storing the furthest reaching D-paths for the LCS computation
291
     *
292
     * @return int[] Array of 3 integers where $result[0] is the x coordinate of the current location in the diagonal
293
     *               with the most progress, $result[1] is the y coordinate of the current location in the diagonal with
294
     *               the most progress and $result[2] is the amount of progress made in that diagonal.
295
     */
296 1
    private function findMostProgress(int $M, int $N, int $limit, array $V): array
297
    {
298 1
        $delta = $N - $M;
299
300 1
        if (($M & 1) === ($limit & 1)) {
301 1
            $forwardStartDiag = \max(-$M, -$limit);
302
        } else {
303 1
            $forwardStartDiag = \max(1 - $M, -$limit);
304
        }
305
306 1
        $forwardEndDiag = \min($N, $limit);
307
308 1
        if (($N & 1) === ($limit & 1)) {
309 1
            $backwardStartDiag = \max(-$N, -$limit);
310
        } else {
311 1
            $backwardStartDiag = \max(1 - $N, -$limit);
312
        }
313
314 1
        $backwardEndDiag = \min($M, $limit);
315
316 1
        $maxProgress = array_fill(
317 1
            0,
318 1
            (int) (\max($forwardEndDiag - $forwardStartDiag, $backwardEndDiag - $backwardStartDiag) / 2 + 1),
319 1
            [0, 0, 0]);
320
        // The first entry is current, it is initialized with 0s.
321 1
        $numProgress = 0;
322
323
        // First search the forward diagonals.
324 1
        for ($k = $forwardStartDiag; $k <= $forwardEndDiag; $k += 2) {
325 1
            $x = $V[0][$limit + $k];
326 1
            $y = $x - $k;
327
328 1
            if ($x > $N || $y > $M) {
329
                continue;
330
            }
331
332 1
            $progress = $x + $y;
333
334 1
            if ($progress > $maxProgress[0][2]) {
335 1
                $numProgress = 0;
336 1
                $maxProgress[0][0] = $x;
337 1
                $maxProgress[0][1] = $y;
338 1
                $maxProgress[0][2] = $progress;
339 1
            } elseif ($progress === $maxProgress[0][2]) {
340 1
                $numProgress++;
341 1
                $maxProgress[$numProgress][0] = $x;
342 1
                $maxProgress[$numProgress][1] = $y;
343 1
                $maxProgress[$numProgress][2] = $progress;
344
            }
345
        }
346
347
        // Initially the maximum progress is in the forward direction.
348 1
        $maxProgressForward = true;
349
350
        // Now search the backward diagonals.
351 1
        for ($k = $backwardStartDiag; $k <= $backwardEndDiag; $k += 2) {
352 1
            $x = $V[1][$limit + $k];
353 1
            $y = $x - $k - $delta;
354
355 1
            if ($x < 0 || $y < 0) {
356
                continue;
357
            }
358
359 1
            $progress = $N - $x + $M - $y;
360
361 1
            if ($progress > $maxProgress[0][2]) {
362 1
                $numProgress = 0;
363 1
                $maxProgressForward = false;
364 1
                $maxProgress[0][0] = $x;
365 1
                $maxProgress[0][1] = $y;
366 1
                $maxProgress[0][2] = $progress;
367 1
            } elseif ($progress === $maxProgress[0][2] && !$maxProgressForward) {
368 1
                $numProgress++;
369 1
                $maxProgress[$numProgress][0] = $x;
370 1
                $maxProgress[$numProgress][1] = $y;
371 1
                $maxProgress[$numProgress][2] = $progress;
372
            }
373
        }
374
375 1
        return $maxProgress[(int) ($numProgress / 2)];
376
    }
377
378
    /**
379
     * @return int
380
     */
381
    abstract protected function getLength1(): int;
382
383
    /**
384
     * @return int
385
     */
386
    abstract protected function getLength2(): int;
387
388
    /**
389
     * @param int $i1
390
     * @param int $i2
391
     * @return bool
392
     */
393
    abstract protected function isRangeEqual(int $i1, int $i2): bool;
394
395
    /**
396
     * @param int $lcsLength
397
     */
398
    abstract protected function initializeLcs(int $lcsLength): void;
399
400
    /**
401
     * @param int $sl1
402
     * @param int $sl2
403
     */
404
    abstract protected function setLcs(int $sl1, int $sl2): void;
405
}
406