Completed
Push — master ( 96df1a...267f86 )
by Naveen
09:52
created

Differ::diff()   F

Complexity

Conditions 19
Paths 2400

Size

Total Lines 67
Code Lines 40

Duplication

Lines 0
Ratio 0 %

Importance

Changes 1
Bugs 0 Features 0
Metric Value
c 1
b 0
f 0
dl 0
loc 67
rs 2.7051
cc 19
eloc 40
nc 2400
nop 3

How to fix   Long Method    Complexity   

Long Method

Small methods make your code easier to understand, in particular if combined with a good name. Besides, if your method is small, finding a good name is usually much easier.

For example, if you find yourself adding comments to a method's body, this is usually a good sign to extract the commented part to a new method, and use the comment as a starting point when coming up with a good name for this new method.

Commonly applied refactorings include:

1
<?php
2
/*
3
 * This file is part of the Diff package.
4
 *
5
 * (c) Sebastian Bergmann <[email protected]>
6
 *
7
 * For the full copyright and license information, please view the LICENSE
8
 * file that was distributed with this source code.
9
 */
10
11
namespace SebastianBergmann\Diff;
12
13
use SebastianBergmann\Diff\LCS\LongestCommonSubsequence;
14
use SebastianBergmann\Diff\LCS\TimeEfficientImplementation;
15
use SebastianBergmann\Diff\LCS\MemoryEfficientImplementation;
16
17
/**
18
 * Diff implementation.
19
 */
20
class Differ
21
{
22
    /**
23
     * @var string
24
     */
25
    private $header;
26
27
    /**
28
     * @var bool
29
     */
30
    private $showNonDiffLines;
31
32
    /**
33
     * @param string $header
34
     */
35
    public function __construct($header = "--- Original\n+++ New\n", $showNonDiffLines = true)
36
    {
37
        $this->header           = $header;
38
        $this->showNonDiffLines = $showNonDiffLines;
39
    }
40
41
    /**
42
     * Returns the diff between two arrays or strings as string.
43
     *
44
     * @param array|string             $from
45
     * @param array|string             $to
46
     * @param LongestCommonSubsequence $lcs
47
     *
48
     * @return string
49
     */
50
    public function diff($from, $to, LongestCommonSubsequence $lcs = null)
51
    {
52
        if (!is_array($from) && !is_string($from)) {
53
            $from = (string) $from;
54
        }
55
56
        if (!is_array($to) && !is_string($to)) {
57
            $to = (string) $to;
58
        }
59
60
        $buffer = $this->header;
61
        $diff   = $this->diffToArray($from, $to, $lcs);
62
63
        $inOld = false;
64
        $i     = 0;
65
        $old   = array();
66
67
        foreach ($diff as $line) {
68
            if ($line[1] ===  0 /* OLD */) {
69
                if ($inOld === false) {
70
                    $inOld = $i;
71
                }
72
            } elseif ($inOld !== false) {
73
                if (($i - $inOld) > 5) {
74
                    $old[$inOld] = $i - 1;
75
                }
76
77
                $inOld = false;
78
            }
79
80
            ++$i;
81
        }
82
83
        $start = isset($old[0]) ? $old[0] : 0;
84
        $end   = count($diff);
85
86
        if ($tmp = array_search($end, $old)) {
87
            $end = $tmp;
88
        }
89
90
        $newChunk = true;
91
92
        for ($i = $start; $i < $end; $i++) {
93
            if (isset($old[$i])) {
94
                $buffer  .= "\n";
95
                $newChunk = true;
96
                $i        = $old[$i];
97
            }
98
99
            if ($newChunk) {
100
                if ($this->showNonDiffLines === true) {
101
                    $buffer .= "@@ @@\n";
102
                }
103
                $newChunk = false;
104
            }
105
106
            if ($diff[$i][1] === 1 /* ADDED */) {
107
                $buffer .= '+' . $diff[$i][0] . "\n";
108
            } elseif ($diff[$i][1] === 2 /* REMOVED */) {
109
                $buffer .= '-' . $diff[$i][0] . "\n";
110
            } elseif ($this->showNonDiffLines === true) {
111
                $buffer .= ' ' . $diff[$i][0] . "\n";
112
            }
113
        }
114
115
        return $buffer;
116
    }
117
118
    /**
119
     * Returns the diff between two arrays or strings as array.
120
     *
121
     * Each array element contains two elements:
122
     *   - [0] => string $token
123
     *   - [1] => 2|1|0
124
     *
125
     * - 2: REMOVED: $token was removed from $from
126
     * - 1: ADDED: $token was added to $from
127
     * - 0: OLD: $token is not changed in $to
128
     *
129
     * @param array|string             $from
130
     * @param array|string             $to
131
     * @param LongestCommonSubsequence $lcs
132
     *
133
     * @return array
134
     */
135
    public function diffToArray($from, $to, LongestCommonSubsequence $lcs = null)
136
    {
137
        preg_match_all('(\r\n|\r|\n)', $from, $fromMatches);
138
        preg_match_all('(\r\n|\r|\n)', $to, $toMatches);
139
140
        if (is_string($from)) {
141
            $from = preg_split('(\r\n|\r|\n)', $from);
142
        }
143
144
        if (is_string($to)) {
145
            $to = preg_split('(\r\n|\r|\n)', $to);
146
        }
147
148
        $start      = array();
149
        $end        = array();
150
        $fromLength = count($from);
151
        $toLength   = count($to);
152
        $length     = min($fromLength, $toLength);
153
154
        for ($i = 0; $i < $length; ++$i) {
155
            if ($from[$i] === $to[$i]) {
156
                $start[] = $from[$i];
157
                unset($from[$i], $to[$i]);
158
            } else {
159
                break;
160
            }
161
        }
162
163
        $length -= $i;
164
165
        for ($i = 1; $i < $length; ++$i) {
166
            if ($from[$fromLength - $i] === $to[$toLength - $i]) {
167
                array_unshift($end, $from[$fromLength - $i]);
168
                unset($from[$fromLength - $i], $to[$toLength - $i]);
169
            } else {
170
                break;
171
            }
172
        }
173
174
        if ($lcs === null) {
175
            $lcs = $this->selectLcsImplementation($from, $to);
176
        }
177
178
        $common = $lcs->calculate(array_values($from), array_values($to));
179
        $diff   = array();
180
181
        if (isset($fromMatches[0]) && $toMatches[0] &&
182
            count($fromMatches[0]) === count($toMatches[0]) &&
183
            $fromMatches[0] !== $toMatches[0]) {
184
            $diff[] = array(
185
              '#Warning: Strings contain different line endings!', 0
186
            );
187
        }
188
189
        foreach ($start as $token) {
190
            $diff[] = array($token, 0 /* OLD */);
191
        }
192
193
        reset($from);
194
        reset($to);
195
196
        foreach ($common as $token) {
197
            while ((($fromToken = reset($from)) !== $token)) {
198
                $diff[] = array(array_shift($from), 2 /* REMOVED */);
199
            }
200
201
            while ((($toToken = reset($to)) !== $token)) {
202
                $diff[] = array(array_shift($to), 1 /* ADDED */);
203
            }
204
205
            $diff[] = array($token, 0 /* OLD */);
206
207
            array_shift($from);
208
            array_shift($to);
209
        }
210
211 View Code Duplication
        while (($token = array_shift($from)) !== null) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
212
            $diff[] = array($token, 2 /* REMOVED */);
213
        }
214
215 View Code Duplication
        while (($token = array_shift($to)) !== null) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
216
            $diff[] = array($token, 1 /* ADDED */);
217
        }
218
219
        foreach ($end as $token) {
220
            $diff[] = array($token, 0 /* OLD */);
221
        }
222
223
        return $diff;
224
    }
225
226
    /**
227
     * @param array $from
228
     * @param array $to
229
     *
230
     * @return LongestCommonSubsequence
231
     */
232
    private function selectLcsImplementation(array $from, array $to)
233
    {
234
        // We do not want to use the time-efficient implementation if its memory
235
        // footprint will probably exceed this value. Note that the footprint
236
        // calculation is only an estimation for the matrix and the LCS method
237
        // will typically allocate a bit more memory than this.
238
        $memoryLimit = 100 * 1024 * 1024;
239
240
        if ($this->calculateEstimatedFootprint($from, $to) > $memoryLimit) {
241
            return new MemoryEfficientImplementation;
242
        }
243
244
        return new TimeEfficientImplementation;
245
    }
246
247
    /**
248
     * Calculates the estimated memory footprint for the DP-based method.
249
     *
250
     * @param array $from
251
     * @param array $to
252
     *
253
     * @return int
254
     */
255
    private function calculateEstimatedFootprint(array $from, array $to)
256
    {
257
        $itemSize = PHP_INT_SIZE == 4 ? 76 : 144;
258
259
        return $itemSize * pow(min(count($from), count($to)), 2);
260
    }
261
}
262