LCS   F
last analyzed

Complexity

Total Complexity 64

Size/Duplication

Total Lines 386
Duplicated Lines 0 %

Test Coverage

Coverage 60.98%

Importance

Changes 0
Metric Value
eloc 165
dl 0
loc 386
ccs 100
cts 164
cp 0.6098
rs 3.28
c 0
b 0
f 0
wmc 64

5 Methods

Rating   Name   Duplication   Size   Complexity  
A getLength() 0 3 1
B lcsRec() 0 38 7
F findMiddleSnake() 0 119 33
C findMostProgress() 0 79 14
B longestCommonSubsequence() 0 47 9

How to fix   Complexity   

Complex Class

Complex classes like LCS often do a lot of different things. To break such a class down, we need to identify a cohesive component within that class. A common approach to find such a component is to look for fields/methods that share the same prefixes, or suffixes.

Once you have determined the fields that belong together, you can apply the Extract Class refactoring. If the component makes sense as a sub-class, Extract Subclass is also a candidate, and is often faster.

While breaking up the class, it is a good idea to analyze how other classes use LCS, and based on these observations, apply Extract Interface, too.

1
<?php
2
/**
3
 * (c) Steve Nebes <[email protected]>
4
 *
5
 * For the full copyright and license information, please view the LICENSE
6
 * file that was distributed with this source code.
7
 */
8
9
declare(strict_types=1);
10
11
namespace SN\DaisyDiff\RangeDifferencer\Core;
12
13
/**
14
 * LCS - Longest Common Subsequence - Common Methods.
15
 *
16
 * Used to determine the change set responsible for each line.
17
 */
18
abstract class LCS
19
{
20
    /** @var int */
21
    private $maxDifferences = 0;
22
23
    /** @var int */
24
    private $length = 0;
25
26
    /**
27
     * @return int
28
     */
29 18
    public function getLength(): int
30
    {
31 18
        return $this->length;
32
    }
33
34
    /**
35
     * Myers' algorithm for longest common subsequence. O((M + N)D) worst case time, O(M + N + D^2) expected time,
36
     * O(M + N) space (http://citeseer.ist.psu.edu/myers86ond.html)
37
     *
38
     * Note: Beyond implementing the algorithm as described in the paper I have added diagonal range compression which
39
     * helps when finding the LCS of a very long and a very short sequence, also bound the running time to (N + M)^1.5
40
     * when both sequences are very long.
41
     *
42
     * After this method is called, the longest common subsequence is available by calling getResult() where result[0]
43
     * is composed of entries from l1 and result[1] is composed of entries from l2
44
     *
45
     * @param LCSSettings $settings
46
     * @return void
47
     */
48 11
    public function longestCommonSubsequence(LCSSettings $settings): void
49
    {
50 11
        $length1 = $this->getLength1();
51 11
        $length2 = $this->getLength2();
52
53 11
        if (0 === $length1 || 0 === $length2) {
54
            $this->length = 0;
55
56
            return;
57
        }
58
59 11
        $this->maxDifferences = (int) (($length1 + $length2 + 1) / 2);
60
61 11
        if ((float) $length1 * (float) $length2 > $settings->getTooLong()) {
62
            // Limit complexity to D^POW_LIMIT for long sequences
63
            $this->maxDifferences = (int) (\pow($this->maxDifferences, $settings->getPowLimit() - 1.0));
64
        }
65
66 11
        $this->initializeLcs($length1);
67
68
        // The common prefixes and suffixes are always part of some LCS, include them now to reduce our search space.
69 11
        $max = \min($length1, $length2);
70
71
        for (
72 11
            $forwardBound = 0;
73 11
            $forwardBound < $max && $this->isRangeEqual($forwardBound, $forwardBound);
74
            $forwardBound++
75
        ) {
76 9
            $this->setLcs($forwardBound, $forwardBound);
77
        }
78
79 11
        $backBoundL1 = $length1 - 1;
80 11
        $backBoundL2 = $length2 - 1;
81
82 11
        while ($backBoundL1 >= $forwardBound &&
83 11
            $backBoundL2 >= $forwardBound &&
84 11
            $this->isRangeEqual($backBoundL1, $backBoundL2)) {
85 9
            $this->setLcs($backBoundL1, $backBoundL2);
86 9
            $backBoundL1--;
87 9
            $backBoundL2--;
88
        }
89
90 11
        $V = \array_fill(0, 2, \array_fill(0, $length1 + $length2 + 1, 0));
91 11
        $snake = [0, 0, 0];
92 11
        $lcsRec = $this->lcsRec($forwardBound, $backBoundL1, $forwardBound, $backBoundL2, $V, $snake);
93
94 11
        $this->length = $forwardBound + $length1 - $backBoundL1 - 1 + $lcsRec;
95 11
    }
96
97
    /**
98
     * The recursive helper function for Myers' LCS. Computes the LCS of l1[bottoml1 .. topl1] and l2[bottoml2 .. topl2]
99
     * fills in the appropriate location in lcs and returns the length.
100
     *
101
     * @param int     $bottomL1
102
     * @param int     $topL1
103
     * @param int     $bottomL2
104
     * @param int     $topL2
105
     * @param int[][] $V
106
     * @param int[]   $snake
107
     * @return int
108
     */
109 11
    private function lcsRec(int $bottomL1, int $topL1, int $bottomL2, int $topL2, array &$V, array &$snake): int
110
    {
111
        // Check that both sequences are non-empty.
112 11
        if ($bottomL1 > $topL1 || $bottomL2 > $topL2) {
113 11
            return 0;
114
        }
115
116
        /** @var int */
117 7
        $d = $this->findMiddleSnake($bottomL1, $topL1, $bottomL2, $topL2, $V, $snake);
118
119
        // Need to store these so we don't lose them when they're overwritten by the recursion.
120 7
        $len = $snake[2];
121 7
        $startX = $snake[0];
122 7
        $startY = $snake[1];
123
124
        // The middle snake is part of the LCS, store it.
125 7
        for ($i = 0; $i < $len; $i++) {
126 7
            $this->setLcs($startX + $i, $startY + $i);
127
        }
128
129 7
        if ($d > 1) {
130 7
            $lcs1 = $this->lcsRec($bottomL1, $startX - 1, $bottomL2, $startY - 1, $V, $snake);
131 7
            $lcs2 = $this->lcsRec($startX + $len, $topL1, $startY + $len, $topL2, $V, $snake);
132
133 7
            return (int) ($len + $lcs1 + $lcs2);
134
        } elseif ($d === 1) {
135
            // In this case the sequences differ by exactly 1 line. We have already saved all the lines after the
136
            // difference in the for loop above, now we need to save all the lines before the difference.
137
            $max = \min($startX - $bottomL1, $startY - $bottomL2);
138
139
            for ($i = 0; $i < $max; $i++) {
140
                $this->setLcs($bottomL1 + $i, $bottomL2 + $i);
141
            }
142
143
            return $max + $len;
144
        }
145
146
        return $len;
147
    }
148
149
    /**
150
     * Helper function for Myers' LCS algorithm to find the middle snake for l1[bottoml1..topl1] and l2[bottoml2..topl2]
151
     * The x, y coodrdinates of the start of the middle snake are saved in snake[0], snake[1] respectively and the
152
     * length of the snake is saved in s[2].
153
     *
154
     * @param int     $bottomL1
155
     * @param int     $topL1
156
     * @param int     $bottomL2
157
     * @param int     $topL2
158
     * @param int[][] $V
159
     * @param int[]   $snake
160
     * @return int
161
     */
162 7
    private function findMiddleSnake(
163
        int $bottomL1,
164
        int $topL1,
165
        int $bottomL2,
166
        int $topL2,
167
        array &$V,
168
        array &$snake
169
    ): int {
170 7
        $N = $topL1 - $bottomL1 + 1;
171 7
        $M = $topL2 - $bottomL2 + 1;
172
173 7
        $delta = $N - $M;
174 7
        $isEven = ($delta & 1) === 1 ? false : true;
175
176 7
        $limit = \min($this->maxDifferences, (int) (($N + $M + 1) / 2));
177
178
        // Offset to make it odd/even.
179
        // a 0 or 1 that we add to the start offset to make it odd/even
180 7
        $valueToAddForward = ($M & 1) === 1 ? 1 : 0;
181 7
        $valueToAddBackward = ($N & 1) === 1 ? 1 : 0;
182
183 7
        $startForward = -$M;
184 7
        $endForward = $N;
185 7
        $startBackward = -$N;
186 7
        $endBackward = $M;
187
188 7
        $V[0][$limit + 1] = 0;
189 7
        $V[1][$limit - 1] = $N;
190
191 7
        for ($d = 0; $d <= $limit; $d++) {
192 7
            $startDiag = \max($valueToAddForward + $startForward, -$d);
193 7
            $endDiag = \min($endForward, $d);
194 7
            $valueToAddForward = 1 - $valueToAddForward;
195
196
            // Compute forward furthest reaching paths.
197 7
            for ($k = $startDiag; $k <= $endDiag; $k += 2) {
198 7
                if ($k === -$d || ($k < $d && $V[0][$limit + $k - 1] < $V[0][$limit + $k + 1])) {
199 7
                    $x = $V[0][$limit + $k + 1];
200
                } else {
201 7
                    $x = $V[0][$limit + $k - 1] + 1;
202
                }
203
204 7
                $y = $x - $k;
205
206 7
                $snake[0] = $x + $bottomL1;
207 7
                $snake[1] = $y + $bottomL2;
208 7
                $snake[2] = 0;
209
210 7
                while ($x < $N && $y < $M && $this->isRangeEqual($x + $bottomL1, $y + $bottomL2)) {
211 7
                    $x++;
212 7
                    $y++;
213 7
                    $snake[2]++;
214
                }
215
216 7
                $V[0][$limit + $k] = $x;
217
218 7
                if (!$isEven && $k >= $delta - $d + 1 && $k <= $delta + $d - 1 && $x >= $V[1][$limit + $k - $delta]) {
219 7
                    return (int) (2 * $d - 1);
220
                }
221
222
                // Check to see if we can cut down the diagonal range.
223 7
                if ($x >= $N && $endForward > $k - 1) {
224 3
                    $endForward = $k - 1;
225 7
                } elseif ($y >= $M) {
226 5
                    $startForward = $k + 1;
227 5
                    $valueToAddForward = 0;
228
                }
229
            }
230
231 7
            $startDiag = \max($valueToAddBackward + $startBackward, -$d);
232 7
            $endDiag = \min($endBackward, $d);
233 7
            $valueToAddBackward = 1 - $valueToAddBackward;
234
235
            // Compute backward furthest reaching paths.
236 7
            for ($k = $startDiag; $k <= $endDiag; $k += 2) {
237 7
                if ($k === $d || ($k !== -$d && $V[1][$limit + $k - 1] < $V[1][$limit + $k + 1])) {
238 7
                    $x = $V[1][$limit + $k - 1];
239
                } else {
240 7
                    $x = $V[1][$limit + $k + 1] - 1;
241
                }
242
243 7
                $y = $x - $k - $delta;
244 7
                $snake[2] = 0;
245
246 7
                while ($x > 0 && $y > 0 && $this->isRangeEqual($x - 1 + $bottomL1, $y - 1 + $bottomL2)) {
247 7
                    $x--;
248 7
                    $y--;
249 7
                    $snake[2]++;
250
                }
251
252 7
                $V[1][$limit + $k] = $x;
253
254 7
                if ($isEven && $k >= -$delta - $d && $k <= $d - $delta && $x <= $V[0][$limit + $k + $delta]) {
255 5
                    $snake[0] = $bottomL1 + $x;
256 5
                    $snake[1] = $bottomL2 + $y;
257
258 5
                    return (int) (2 * $d);
259
                }
260
261
                // Check to see if we can cut down our diagonal range.
262 7
                if ($x <= 0) {
263 3
                    $startBackward = $k + 1;
264 3
                    $valueToAddBackward = 0;
265 7
                } elseif ($y <= 0 && $endBackward > $k - 1) {
266 5
                    $endBackward = $k - 1;
267
                }
268
            }
269
        }
270
271
        // Computing the true LCS is too expensive, instead find the diagonal with the most progress and pretend a
272
        // middle snake of length 0 occurs there.
273
        /** @var int[] */
274
        $mostProgress = $this->findMostProgress($M, $N, $limit, $V);
275
276
        $snake[0] = $bottomL1 + $mostProgress[0];
277
        $snake[1] = $bottomL2 + $mostProgress[1];
278
        $snake[2] = 0;
279
280
        return 5;
281
282
        // HACK: since we didn't really finish the LCS computation we don't really know the length of the SES. We don't
283
        // do anything with the result anyway, unless it's <=1. We know for a fact SES > 1 so 5 is as good a number as
284
        // any to return here.
285
    }
286
287
    /**
288
     * @param int     $M
289
     * @param int     $N
290
     * @param int     $limit
291
     * @param int[][] $V
292
     * @return int[]
293
     */
294
    private function findMostProgress(int $M, int $N, int $limit, array &$V): array
295
    {
296
        $delta = $N - $M;
297
298
        if (($M & 1) === ($limit & 1)) {
299
            $forwardStartDiag = \max(-$M, -$limit);
300
        } else {
301
            $forwardStartDiag = \max(1 - $M, -$limit);
302
        }
303
304
        $forwardEndDiag = \min($N, $limit);
305
306
        if (($N & 1) === ($limit & 1)) {
307
            $backwardStartDiag = \max(-$N, -$limit);
308
        } else {
309
            $backwardStartDiag = \max(1 - $N, -$limit);
310
        }
311
312
        $backwardEndDiag = \min($M, $limit);
313
        $maxProgress = \array_fill(0,
314
            (int) (\max($forwardEndDiag - $forwardStartDiag, $backwardEndDiag - $backwardStartDiag) / 2 + 1),
315
            [0, 0, 0]);
316
        $numProgress = 0;
317
        // the 1st entry is current, it is initialized with 0s.
318
319
        // First search the forward diagonals.
320
        for ($k = $forwardStartDiag; $k <= $forwardEndDiag; $k += 2) {
321
            $x = $V[0][$limit + $k];
322
            $y = $x - $k;
323
324
            if ($x > $N || $y > $M) {
325
                continue;
326
            }
327
328
            $progress = $x + $y;
329
330
            if ($progress > $maxProgress[0][2]) {
331
                $numProgress = 0;
332
                $maxProgress[0][0] = $x;
333
                $maxProgress[0][1] = $y;
334
                $maxProgress[0][2] = $progress;
335
            } elseif ($progress === $maxProgress[0][2]) {
336
                $numProgress++;
337
                $maxProgress[$numProgress][0] = $x;
338
                $maxProgress[$numProgress][1] = $y;
339
                $maxProgress[$numProgress][2] = $progress;
340
            }
341
        }
342
343
        // Progress is in the forward direction.
344
        $maxProgressForward = true;
345
346
        // Now search the backward diagonals.
347
        for ($k = $backwardStartDiag; $k <= $backwardEndDiag; $k += 2) {
348
            $x = $V[1][$limit + $k];
349
            $y = $x - $k - $delta;
350
351
            if ($x < 0 || $y < 0) {
352
                continue;
353
            }
354
355
            $progress = $N - $x + $M - $y;
356
357
            if ($progress > $maxProgress[0][2]) {
358
                $numProgress = 0;
359
                $maxProgressForward = false;
360
                $maxProgress[0][0] = $x;
361
                $maxProgress[0][1] = $y;
362
                $maxProgress[0][2] = $progress;
363
            } elseif ($progress === $maxProgress[0][2] && !$maxProgressForward) {
364
                $numProgress++;
365
                $maxProgress[$numProgress][0] = $x;
366
                $maxProgress[$numProgress][1] = $y;
367
                $maxProgress[$numProgress][2] = $progress;
368
            }
369
        }
370
371
        // Return the middle diagonal with maximum progress.
372
        return $maxProgress[(int) ($numProgress / 2)];
373
    }
374
375
    /**
376
     * @return int
377
     */
378
    abstract protected function getLength1(): int;
379
380
    /**
381
     * @return int
382
     */
383
    abstract protected function getLength2(): int;
384
385
    /**
386
     * @param int $i1
387
     * @param int $i2
388
     * @return bool
389
     */
390
    abstract protected function isRangeEqual(int $i1, int $i2): bool;
391
392
    /**
393
     * @param int $sl1
394
     * @param int $sl2
395
     * @return void
396
     */
397
    abstract protected function setLcs(int $sl1, int $sl2): void;
398
399
    /**
400
     * @param int $lcsLength
401
     * @return void
402
     */
403
    abstract protected function initializeLcs(int $lcsLength): void;
404
}
405