1
|
|
|
<?php |
2
|
|
|
/* |
3
|
|
|
* This file is part of the Diff package. |
4
|
|
|
* |
5
|
|
|
* (c) Sebastian Bergmann <[email protected]> |
6
|
|
|
* |
7
|
|
|
* For the full copyright and license information, please view the LICENSE |
8
|
|
|
* file that was distributed with this source code. |
9
|
|
|
*/ |
10
|
|
|
|
11
|
|
|
namespace SebastianBergmann\Diff; |
12
|
|
|
|
13
|
|
|
use SebastianBergmann\Diff\LCS\LongestCommonSubsequence; |
14
|
|
|
use SebastianBergmann\Diff\LCS\TimeEfficientImplementation; |
15
|
|
|
use SebastianBergmann\Diff\LCS\MemoryEfficientImplementation; |
16
|
|
|
|
17
|
|
|
/** |
18
|
|
|
* Diff implementation. |
19
|
|
|
*/ |
20
|
|
|
class Differ |
21
|
|
|
{ |
22
|
|
|
/** |
23
|
|
|
* @var string |
24
|
|
|
*/ |
25
|
|
|
private $header; |
26
|
|
|
|
27
|
|
|
/** |
28
|
|
|
* @var bool |
29
|
|
|
*/ |
30
|
|
|
private $showNonDiffLines; |
31
|
|
|
|
32
|
|
|
/** |
33
|
|
|
* @param string $header |
34
|
|
|
*/ |
35
|
|
|
public function __construct($header = "--- Original\n+++ New\n", $showNonDiffLines = true) |
36
|
|
|
{ |
37
|
|
|
$this->header = $header; |
38
|
|
|
$this->showNonDiffLines = $showNonDiffLines; |
39
|
|
|
} |
40
|
|
|
|
41
|
|
|
/** |
42
|
|
|
* Returns the diff between two arrays or strings as string. |
43
|
|
|
* |
44
|
|
|
* @param array|string $from |
45
|
|
|
* @param array|string $to |
46
|
|
|
* @param LongestCommonSubsequence $lcs |
47
|
|
|
* |
48
|
|
|
* @return string |
49
|
|
|
*/ |
50
|
|
|
public function diff($from, $to, LongestCommonSubsequence $lcs = null) |
51
|
|
|
{ |
52
|
|
|
if (!is_array($from) && !is_string($from)) { |
53
|
|
|
$from = (string) $from; |
54
|
|
|
} |
55
|
|
|
|
56
|
|
|
if (!is_array($to) && !is_string($to)) { |
57
|
|
|
$to = (string) $to; |
58
|
|
|
} |
59
|
|
|
|
60
|
|
|
$buffer = $this->header; |
61
|
|
|
$diff = $this->diffToArray($from, $to, $lcs); |
62
|
|
|
|
63
|
|
|
$inOld = false; |
64
|
|
|
$i = 0; |
65
|
|
|
$old = array(); |
66
|
|
|
|
67
|
|
|
foreach ($diff as $line) { |
68
|
|
|
if ($line[1] === 0 /* OLD */) { |
69
|
|
|
if ($inOld === false) { |
70
|
|
|
$inOld = $i; |
71
|
|
|
} |
72
|
|
|
} elseif ($inOld !== false) { |
73
|
|
|
if (($i - $inOld) > 5) { |
74
|
|
|
$old[$inOld] = $i - 1; |
75
|
|
|
} |
76
|
|
|
|
77
|
|
|
$inOld = false; |
78
|
|
|
} |
79
|
|
|
|
80
|
|
|
++$i; |
81
|
|
|
} |
82
|
|
|
|
83
|
|
|
$start = isset($old[0]) ? $old[0] : 0; |
84
|
|
|
$end = count($diff); |
85
|
|
|
|
86
|
|
|
if ($tmp = array_search($end, $old)) { |
87
|
|
|
$end = $tmp; |
88
|
|
|
} |
89
|
|
|
|
90
|
|
|
$newChunk = true; |
91
|
|
|
|
92
|
|
|
for ($i = $start; $i < $end; $i++) { |
93
|
|
|
if (isset($old[$i])) { |
94
|
|
|
$buffer .= "\n"; |
95
|
|
|
$newChunk = true; |
96
|
|
|
$i = $old[$i]; |
97
|
|
|
} |
98
|
|
|
|
99
|
|
|
if ($newChunk) { |
100
|
|
|
if ($this->showNonDiffLines === true) { |
101
|
|
|
$buffer .= "@@ @@\n"; |
102
|
|
|
} |
103
|
|
|
$newChunk = false; |
104
|
|
|
} |
105
|
|
|
|
106
|
|
|
if ($diff[$i][1] === 1 /* ADDED */) { |
107
|
|
|
$buffer .= '+' . $diff[$i][0] . "\n"; |
108
|
|
|
} elseif ($diff[$i][1] === 2 /* REMOVED */) { |
109
|
|
|
$buffer .= '-' . $diff[$i][0] . "\n"; |
110
|
|
|
} elseif ($this->showNonDiffLines === true) { |
111
|
|
|
$buffer .= ' ' . $diff[$i][0] . "\n"; |
112
|
|
|
} |
113
|
|
|
} |
114
|
|
|
|
115
|
|
|
return $buffer; |
116
|
|
|
} |
117
|
|
|
|
118
|
|
|
/** |
119
|
|
|
* Returns the diff between two arrays or strings as array. |
120
|
|
|
* |
121
|
|
|
* Each array element contains two elements: |
122
|
|
|
* - [0] => string $token |
123
|
|
|
* - [1] => 2|1|0 |
124
|
|
|
* |
125
|
|
|
* - 2: REMOVED: $token was removed from $from |
126
|
|
|
* - 1: ADDED: $token was added to $from |
127
|
|
|
* - 0: OLD: $token is not changed in $to |
128
|
|
|
* |
129
|
|
|
* @param array|string $from |
130
|
|
|
* @param array|string $to |
131
|
|
|
* @param LongestCommonSubsequence $lcs |
132
|
|
|
* |
133
|
|
|
* @return array |
134
|
|
|
*/ |
135
|
|
|
public function diffToArray($from, $to, LongestCommonSubsequence $lcs = null) |
136
|
|
|
{ |
137
|
|
|
preg_match_all('(\r\n|\r|\n)', $from, $fromMatches); |
138
|
|
|
preg_match_all('(\r\n|\r|\n)', $to, $toMatches); |
139
|
|
|
|
140
|
|
|
if (is_string($from)) { |
141
|
|
|
$from = preg_split('(\r\n|\r|\n)', $from); |
142
|
|
|
} |
143
|
|
|
|
144
|
|
|
if (is_string($to)) { |
145
|
|
|
$to = preg_split('(\r\n|\r|\n)', $to); |
146
|
|
|
} |
147
|
|
|
|
148
|
|
|
$start = array(); |
149
|
|
|
$end = array(); |
150
|
|
|
$fromLength = count($from); |
151
|
|
|
$toLength = count($to); |
152
|
|
|
$length = min($fromLength, $toLength); |
153
|
|
|
|
154
|
|
|
for ($i = 0; $i < $length; ++$i) { |
155
|
|
|
if ($from[$i] === $to[$i]) { |
156
|
|
|
$start[] = $from[$i]; |
157
|
|
|
unset($from[$i], $to[$i]); |
158
|
|
|
} else { |
159
|
|
|
break; |
160
|
|
|
} |
161
|
|
|
} |
162
|
|
|
|
163
|
|
|
$length -= $i; |
164
|
|
|
|
165
|
|
|
for ($i = 1; $i < $length; ++$i) { |
166
|
|
|
if ($from[$fromLength - $i] === $to[$toLength - $i]) { |
167
|
|
|
array_unshift($end, $from[$fromLength - $i]); |
168
|
|
|
unset($from[$fromLength - $i], $to[$toLength - $i]); |
169
|
|
|
} else { |
170
|
|
|
break; |
171
|
|
|
} |
172
|
|
|
} |
173
|
|
|
|
174
|
|
|
if ($lcs === null) { |
175
|
|
|
$lcs = $this->selectLcsImplementation($from, $to); |
176
|
|
|
} |
177
|
|
|
|
178
|
|
|
$common = $lcs->calculate(array_values($from), array_values($to)); |
179
|
|
|
$diff = array(); |
180
|
|
|
|
181
|
|
|
if (isset($fromMatches[0]) && $toMatches[0] && |
182
|
|
|
count($fromMatches[0]) === count($toMatches[0]) && |
183
|
|
|
$fromMatches[0] !== $toMatches[0]) { |
184
|
|
|
$diff[] = array( |
185
|
|
|
'#Warning: Strings contain different line endings!', 0 |
186
|
|
|
); |
187
|
|
|
} |
188
|
|
|
|
189
|
|
|
foreach ($start as $token) { |
190
|
|
|
$diff[] = array($token, 0 /* OLD */); |
191
|
|
|
} |
192
|
|
|
|
193
|
|
|
reset($from); |
194
|
|
|
reset($to); |
195
|
|
|
|
196
|
|
|
foreach ($common as $token) { |
197
|
|
|
while ((($fromToken = reset($from)) !== $token)) { |
198
|
|
|
$diff[] = array(array_shift($from), 2 /* REMOVED */); |
199
|
|
|
} |
200
|
|
|
|
201
|
|
|
while ((($toToken = reset($to)) !== $token)) { |
202
|
|
|
$diff[] = array(array_shift($to), 1 /* ADDED */); |
203
|
|
|
} |
204
|
|
|
|
205
|
|
|
$diff[] = array($token, 0 /* OLD */); |
206
|
|
|
|
207
|
|
|
array_shift($from); |
208
|
|
|
array_shift($to); |
209
|
|
|
} |
210
|
|
|
|
211
|
|
View Code Duplication |
while (($token = array_shift($from)) !== null) { |
|
|
|
|
212
|
|
|
$diff[] = array($token, 2 /* REMOVED */); |
213
|
|
|
} |
214
|
|
|
|
215
|
|
View Code Duplication |
while (($token = array_shift($to)) !== null) { |
|
|
|
|
216
|
|
|
$diff[] = array($token, 1 /* ADDED */); |
217
|
|
|
} |
218
|
|
|
|
219
|
|
|
foreach ($end as $token) { |
220
|
|
|
$diff[] = array($token, 0 /* OLD */); |
221
|
|
|
} |
222
|
|
|
|
223
|
|
|
return $diff; |
224
|
|
|
} |
225
|
|
|
|
226
|
|
|
/** |
227
|
|
|
* @param array $from |
228
|
|
|
* @param array $to |
229
|
|
|
* |
230
|
|
|
* @return LongestCommonSubsequence |
231
|
|
|
*/ |
232
|
|
|
private function selectLcsImplementation(array $from, array $to) |
233
|
|
|
{ |
234
|
|
|
// We do not want to use the time-efficient implementation if its memory |
235
|
|
|
// footprint will probably exceed this value. Note that the footprint |
236
|
|
|
// calculation is only an estimation for the matrix and the LCS method |
237
|
|
|
// will typically allocate a bit more memory than this. |
238
|
|
|
$memoryLimit = 100 * 1024 * 1024; |
239
|
|
|
|
240
|
|
|
if ($this->calculateEstimatedFootprint($from, $to) > $memoryLimit) { |
241
|
|
|
return new MemoryEfficientImplementation; |
242
|
|
|
} |
243
|
|
|
|
244
|
|
|
return new TimeEfficientImplementation; |
245
|
|
|
} |
246
|
|
|
|
247
|
|
|
/** |
248
|
|
|
* Calculates the estimated memory footprint for the DP-based method. |
249
|
|
|
* |
250
|
|
|
* @param array $from |
251
|
|
|
* @param array $to |
252
|
|
|
* |
253
|
|
|
* @return int |
254
|
|
|
*/ |
255
|
|
|
private function calculateEstimatedFootprint(array $from, array $to) |
256
|
|
|
{ |
257
|
|
|
$itemSize = PHP_INT_SIZE == 4 ? 76 : 144; |
258
|
|
|
|
259
|
|
|
return $itemSize * pow(min(count($from), count($to)), 2); |
260
|
|
|
} |
261
|
|
|
} |
262
|
|
|
|
Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.
You can also find more detailed suggestions in the “Code” section of your repository.