Completed
Push — master ( 96cbf3...9bcdfc )
by Steve
03:03 queued 01:23
created

AbstractLCS   F

Complexity

Total Complexity 64

Size/Duplication

Total Lines 392
Duplicated Lines 0 %

Importance

Changes 0
Metric Value
eloc 166
dl 0
loc 392
rs 3.28
c 0
b 0
f 0
wmc 64

5 Methods

Rating   Name   Duplication   Size   Complexity  
F findMiddleSnake() 0 124 33
B lcsRec() 0 36 7
A getLength() 0 3 1
C findMostProgress() 0 80 14
B longestCommonSubsequence() 0 46 9

How to fix   Complexity   

Complex Class

Complex classes like AbstractLCS often do a lot of different things. To break such a class down, we need to identify a cohesive component within that class. A common approach to find such a component is to look for fields/methods that share the same prefixes, or suffixes.

Once you have determined the fields that belong together, you can apply the Extract Class refactoring. If the component makes sense as a sub-class, Extract Subclass is also a candidate, and is often faster.

While breaking up the class, it is a good idea to analyze how other classes use AbstractLCS, and based on these observations, apply Extract Interface, too.

1
<?php
2
3
/**
4
 * (c) Steve Nebes <[email protected]>
5
 *
6
 * For the full copyright and license information, please view the LICENSE
7
 * file that was distributed with this source code.
8
 */
9
10
declare(strict_types=1);
11
12
namespace SN\RangeDifferencer\Core;
13
14
abstract class AbstractLCS
15
{
16
    /** @const float 10^8, the value of N*M when to start bindnig the run time. */
17
    private const TOO_LONG = 100000000.0;
18
19
    /** @const float Limit the time to D^POW_LIMIT */
20
    private const POW_LIMIT = 1.5;
21
22
    /** @var int */
23
    private $maxDifferences = 0;
24
25
    /** @var int */
26
    private $length = 0;
27
28
    /**
29
     * @return int
30
     */
31
    public function getLength(): int
32
    {
33
        return $this->length;
34
    }
35
36
    /**
37
     * Myers' algorithm for longest common subsequence. O((M + N)D) worst case time, O(M + N + D^2) expected time, O(M
38
     * + N) space (http://citeseer.ist.psu.edu/myers86ond.html)
39
     *
40
     * Note: Beyond implementing the algorithm as described in the paper I have added diagonal range compression which
41
     * helps when finding the LCS of a very long and a very short sequence, also bound the running time to (N + M)^1.5
42
     * when both sequences are very long.
43
     *
44
     * After this method is called, the longest common subsequence is available by calling getResult() where result[0]
45
     * is composed of entries from l1 and result[1] is composed of entries from l2
46
     */
47
    public function longestCommonSubsequence(): void
48
    {
49
        $length1 = $this->getLength1();
50
        $length2 = $this->getLength2();
51
52
        if (0 === $length1 || 0 === $length2) {
53
            $this->length = 0;
54
55
            return;
56
        }
57
58
        $this->maxDifferences = (int)(($length1 + $length2 + 1) / 2);
59
60
        if ($length1 * $length2 > self::TOO_LONG) {
61
            // Limit complexity to D^POW_LIMIT for long sequences.
62
            $this->maxDifferences = (int)\pow($this->maxDifferences, self::POW_LIMIT - 1.0);
63
        }
64
65
        $this->initializeLcs($length1);
66
67
        // The common prefixes and suffixes are always part of some LCS, include them now to reduce our search space.
68
        $max = \min($length1, $length2);
69
70
        for (
71
            $forwardBound = 0;
72
            $forwardBound < $max && $this->isRangeEqual($forwardBound, $forwardBound);
73
            $forwardBound++
74
        ) {
75
            $this->setLcs($forwardBound, $forwardBound);
76
        }
77
78
        $backBoundL1 = $length1 - 1;
79
        $backBoundL2 = $length2 - 1;
80
81
        while ($backBoundL1 >= $forwardBound && $backBoundL2 >= $forwardBound &&
82
            $this->isRangeEqual($backBoundL1, $backBoundL2)) {
83
            $this->setLcs($backBoundL1, $backBoundL2);
84
            $backBoundL1--;
85
            $backBoundL2--;
86
        }
87
88
        $V = array_fill(0, 2, array_fill(0, $length1 + $length2 + 1, 0));
89
        $snake = array_fill(0, 3, 0);
90
        $lcsRec = $this->lcsRec($forwardBound, $backBoundL1, $forwardBound, $backBoundL2, $V, $snake);
91
92
        $this->length = $forwardBound + $length1 - $backBoundL1 - 1 + $lcsRec;
93
    }
94
95
    /**
96
     * The recursive helper function for Myers' LCS. Computes the LCS of $l1[$bottomL1 .. $topL1] and $l2[$bottomL2 ..
97
     * $topL2] fills in the appropriate location in lcs and returns the length.
98
     *
99
     * @param int   $bottomL1 The first sequence
100
     * @param int   $topL1    Index in the first sequence to start from (inclusive)
101
     * @param int   $bottomL2 The second sequence
102
     * @param int   $topL2    Index in the second sequence to start from (inclusive)
103
     * @param array $V        Furthest reaching D-paths
104
     * @param array $snake    Beginning x, y coordinates and the length of the middle snake
105
     * @return int Length of the lcs
106
     */
107
    private function lcsRec(int $bottomL1, int $topL1, int $bottomL2, int $topL2, array &$V, array &$snake): int
108
    {
109
        // Check that both sequences are non-empty.
110
        if ($bottomL1 > $topL1 || $bottomL2 > $topL2) {
111
            return 0;
112
        }
113
114
        $d = $this->findMiddleSnake($bottomL1, $topL1, $bottomL2, $topL2, $V, $snake);
115
116
        // Need to restore these so we don't lose them when they're overwritten by the recursion.
117
        $len = $snake[2];
118
        $startX = $snake[0];
119
        $startY = $snake[1];
120
121
        // The middle snake is part of the LCS, store it.
122
        for ($i = 0; $i < $len; $i++) {
123
            $this->setLcs($startX + $i, $startY + $i);
124
        }
125
126
        if ($d > 1) {
127
            return $len +
128
                $this->lcsRec($bottomL1, $startX - 1, $bottomL2, $startY - 1, $V, $snake) +
129
                $this->lcsRec($startX + $len, $topL1, $startY + $len, $topL2, $V, $snake);
130
        } elseif ($d === 1) {
131
            // In this case the sequences differ by exactly 1 line. We have already saved all the lines after the
132
            // difference in the for loop above, now we need to save all the lines before the difference.
133
            $max = \min($startX - $bottomL1, $startY - $bottomL2);
134
135
            for ($i = 0; $i < $max; $i++) {
136
                $this->setLcs($bottomL1 + $i, $bottomL2 + $i);
137
            }
138
139
            return $max + $len;
140
        }
141
142
        return $len;
143
    }
144
145
    /**
146
     * Helper function for Myers' LCS algorithm to find the middle snake for $l1[$bottomL1 .. $topL1] and $l2[$bottomL2
147
     * .. $topL2] The x, y coordinates of the start of the middle snake are saved in $snake[0], $snake[1] respectively
148
     * and the length of the snake is saved in $snake[2].
149
     *
150
     * @param int     $bottomL1 Index in the first sequence to start from (inclusive)
151
     * @param int     $topL1    Index in the first sequence to end from (inclusive)
152
     * @param int     $bottomL2 Index in the second sequence to start from (inclusive)
153
     * @param int     $topL2    Index in the second sequence to end from (inclusive)
154
     * @param int[][] $V        Array storing the furthest reaching D-paths for the LCS computation
155
     * @param int[]   $snake    Beginning x, y coordinates and the length of the middle snake
156
     * @return int
157
     */
158
    private function findMiddleSnake(
159
        int $bottomL1,
160
        int $topL1,
161
        int $bottomL2,
162
        int $topL2,
163
        array &$V,
164
        array &$snake
165
    ): int {
166
        $N = $topL1 - $bottomL1 + 1;
167
        $M = $topL2 - $bottomL2 + 1;
168
169
        $delta = $N - $M;
170
        $isEven = ($delta & 1) === 1 ? false : true;
171
172
        $limit = \min($this->maxDifferences, (int)(($N + $M + 1) / 2));
173
174
        // Offset to make it odd/even.
175
        // a 0 or 1 that we add to the start offset to make it odd/even
176
        $valueToAddForward = ($M & 1) === 1 ? 1 : 0;
177
        $valueToAddBackward = ($N & 1) === 1 ? 1 : 0;
178
179
        $startForward = -$M;
180
        $endForward = $N;
181
        $startBackward = -$N;
182
        $endBackward = $M;
183
184
        $V[0][$limit + 1] = 0;
185
        $V[1][$limit - 1] = $N;
186
187
        for ($d = 0; $d <= $limit; $d++) {
188
            $startDiag = \max($valueToAddForward + $startForward, -$d);
189
            $endDiag = \min($endForward, $d);
190
            $valueToAddForward = 1 - $valueToAddForward;
191
192
            // Compute forward furthest reaching paths.
193
            for ($k = $startDiag; $k <= $endDiag; $k += 2) {
194
                if ($k === -$d || ($k < $d && $V[0][$limit + $k - 1] < $V[0][$limit + $k + 1])) {
195
                    $x = $V[0][$limit + $k + 1];
196
                } else {
197
                    $x = $V[0][$limit + $k - 1] + 1;
198
                }
199
200
                $y = $x - $k;
201
202
                $snake[0] = $x + $bottomL1;
203
                $snake[1] = $y + $bottomL2;
204
                $snake[2] = 0;
205
206
                while ($x < $N && $y < $M && $this->isRangeEqual($x + $bottomL1, $y + $bottomL2)) {
207
                    $x++;
208
                    $y++;
209
                    $snake[2]++;
210
                }
211
212
                $V[0][$limit + $k] = $x;
213
214
                if (!$isEven && $k >= $delta - $d + 1 && $k <= $delta + $d - 1 && $x >= $V[1][$limit + $k - $delta]) {
215
                    return 2 * $d - 1;
216
                }
217
218
                // Check to see if we can cut down the diagonal range.
219
                if ($x >= $N && $endForward > $k - 1) {
220
                    $endForward = $k - 1;
221
                } elseif ($y >= $M) {
222
                    $startForward = $k + 1;
223
                    $valueToAddForward = 0;
224
                }
225
            }
226
227
            $startDiag = \max($valueToAddBackward + $startBackward, -$d);
228
            $endDiag = \min($endBackward, $d);
229
            $valueToAddBackward = 1 - $valueToAddBackward;
230
231
            // Compute backward furthest reaching paths.
232
            for ($k = $startDiag; $k <= $endDiag; $k += 2) {
233
                if ($k === $d || ($k !== -$d && $V[1][$limit + $k - 1] < $V[1][$limit + $k + 1])) {
234
                    $x = $V[1][$limit + $k - 1];
235
                } else {
236
                    $x = $V[1][$limit + $k + 1] - 1;
237
                }
238
239
                $y = $x - $k - $delta;
240
                $snake[2] = 0;
241
242
                while ($x > 0 && $y > 0 && $this->isRangeEqual($x - 1 + $bottomL1, $y - 1 + $bottomL2)) {
243
                    $x--;
244
                    $y--;
245
                    $snake[2]++;
246
                }
247
248
                $V[1][$limit + $k] = $x;
249
250
                if ($isEven && $k >= -$delta - $d && $k <= $d - $delta && $x <= $V[0][$limit + $k + $delta]) {
251
                    $snake[0] = $bottomL1 + $x;
252
                    $snake[1] = $bottomL2 + $y;
253
254
                    return 2 * $d;
255
                }
256
257
                // Check to see if we can cut down our diagonal range.
258
                if ($x <= 0) {
259
                    $startBackward = $k + 1;
260
                    $valueToAddBackward = 0;
261
                } elseif ($y <= 0 && $endBackward > $k - 1) {
262
                    $endBackward = $k - 1;
263
                }
264
            }
265
        }
266
267
        // Computing the true LCS is too expensive, instead find the diagonal with the most progress and pretend a
268
        // middle snake of length 0 occurs there.
269
        $mostProgress = $this->findMostProgress($M, $N, $limit, $V);
270
271
        $snake[0] = $bottomL1 + $mostProgress[0];
272
        $snake[1] = $bottomL2 + $mostProgress[1];
273
        $snake[2] = 0;
274
275
        /*
276
         * HACK: Since we didn't really finish the LCS computation we don't really know the length of the SES. We don't
277
         * do anything with the result anyway, unless it's <=1. We know for a fact SES > 1 so 5 is as good a number as
278
         *  any to return here.
279
         */
280
281
        return 5;
282
    }
283
284
    /**
285
     * Takes the array with furthest reaching D-paths from an LCS computation and returns the x,y coordinates and
286
     * progress made in the middle diagonal among those with maximum progress, both from the front and from the back.
287
     *
288
     * @param int     $M     Length of the first sequence for which LCS is being computed
289
     * @param int     $N     Length of the second sequence for which LCS is being computed
290
     * @param int     $limit Number of steps made in an attempt to find the LCS from the front and back
291
     * @param int[][] $V     Array storing the furthest reaching D-paths for the LCS computation
292
     *
293
     * @return int[] Array of 3 integers where $result[0] is the x coordinate of the current location in the diagonal
294
     *               with the most progress, $result[1] is the y coordinate of the current location in the diagonal with
295
     *               the most progress and $result[2] is the amount of progress made in that diagonal.
296
     */
297
    private function findMostProgress(int $M, int $N, int $limit, array &$V): array
298
    {
299
        $delta = $N - $M;
300
301
        if (($M & 1) === ($limit & 1)) {
302
            $forwardStartDiag = \max(-$M, -$limit);
303
        } else {
304
            $forwardStartDiag = \max(1 - $M, -$limit);
305
        }
306
307
        $forwardEndDiag = \min($N, $limit);
308
309
        if (($N & 1) === ($limit & 1)) {
310
            $backwardStartDiag = \max(-$N, -$limit);
311
        } else {
312
            $backwardStartDiag = \max(1 - $N, -$limit);
313
        }
314
315
        $backwardEndDiag = \min($M, $limit);
316
317
        $maxProgress = array_fill(
318
            0,
319
            (int)(\max($forwardEndDiag - $forwardStartDiag, $backwardEndDiag - $backwardStartDiag) / 2 + 1),
320
            [0, 0, 0]);
321
        // The first entry is current, it is initialized with 0s.
322
        $numProgress = 0;
323
324
        // First search the forward diagonals.
325
        for ($k = $forwardStartDiag; $k <= $forwardEndDiag; $k += 2) {
326
            $x = $V[0][$limit + $k];
327
            $y = $x - $k;
328
329
            if ($x > $N || $y > $M) {
330
                continue;
331
            }
332
333
            $progress = $x + $y;
334
335
            if ($progress > $maxProgress[0][2]) {
336
                $numProgress = 0;
337
                $maxProgress[0][0] = $x;
338
                $maxProgress[0][1] = $y;
339
                $maxProgress[0][2] = $progress;
340
            } elseif ($progress === $maxProgress[0][2]) {
341
                $numProgress++;
342
                $maxProgress[$numProgress][0] = $x;
343
                $maxProgress[$numProgress][1] = $y;
344
                $maxProgress[$numProgress][2] = $progress;
345
            }
346
        }
347
348
        // Initially the maximum progress is in the forward direction.
349
        $maxProgressForward = true;
350
351
        // Now search the backward diagonals.
352
        for ($k = $backwardStartDiag; $k <= $backwardEndDiag; $k += 2) {
353
            $x = $V[1][$limit + $k];
354
            $y = $x - $k - $delta;
355
356
            if ($x < 0 || $y < 0) {
357
                continue;
358
            }
359
360
            $progress = $N - $x + $M - $y;
361
362
            if ($progress > $maxProgress[0][2]) {
363
                $numProgress = 0;
364
                $maxProgressForward = false;
365
                $maxProgress[0][0] = $x;
366
                $maxProgress[0][1] = $y;
367
                $maxProgress[0][2] = $progress;
368
            } elseif ($progress === $maxProgress[0][2] && !$maxProgressForward) {
369
                $numProgress++;
370
                $maxProgress[$numProgress][0] = $x;
371
                $maxProgress[$numProgress][1] = $y;
372
                $maxProgress[$numProgress][2] = $progress;
373
            }
374
        }
375
376
        return $maxProgress[(int)($numProgress / 2)];
377
    }
378
379
    /**
380
     * @return int
381
     */
382
    abstract protected function getLength1(): int;
383
384
    /**
385
     * @return int
386
     */
387
    abstract protected function getLength2(): int;
388
389
    /**
390
     * @param int $i1
391
     * @param int $i2
392
     * @return bool
393
     */
394
    abstract protected function isRangeEqual(int $i1, int $i2): bool;
395
396
    /**
397
     * @param int $lcsLength
398
     */
399
    abstract protected function initializeLcs(int $lcsLength): void;
400
401
    /**
402
     * @param int $sl1
403
     * @param int $sl2
404
     */
405
    abstract protected function setLcs(int $sl1, int $sl2): void;
406
}
407