1
|
|
|
<?php |
2
|
|
|
/** |
3
|
|
|
* Text_Diff |
4
|
|
|
* |
5
|
|
|
* General API for generating and formatting diffs - the differences between |
6
|
|
|
* two sequences of strings. |
7
|
|
|
* |
8
|
|
|
* The PHP diff code used in this package was originally written by Geoffrey |
9
|
|
|
* T. Dairiki and is used with his permission. |
10
|
|
|
* |
11
|
|
|
* $Horde: framework/Text_Diff/Diff.php,v 1.13 2005/05/26 20:26:06 selsky Exp $ |
12
|
|
|
* |
13
|
|
|
* @package Text_Diff |
14
|
|
|
* @author Geoffrey T. Dairiki <[email protected]> |
15
|
|
|
*/ |
16
|
|
|
class Text_Diff |
17
|
|
|
{ |
18
|
|
|
|
19
|
|
|
/** |
20
|
|
|
* Array of changes. |
21
|
|
|
* |
22
|
|
|
* @var array |
23
|
|
|
*/ |
24
|
|
|
public $_edits; |
25
|
|
|
|
26
|
|
|
/** |
27
|
|
|
* Computes diffs between sequences of strings. |
28
|
|
|
* |
29
|
|
|
* @param array $from_lines An array of strings. Typically these are |
30
|
|
|
* lines from a file. |
31
|
|
|
* @param array $to_lines An array of strings. |
32
|
|
|
*/ |
33
|
|
|
//HACK by domifara |
|
|
|
|
34
|
|
|
// function Text_Diff($from_lines, $to_lines) |
35
|
|
|
public function __construct($from_lines, $to_lines) |
36
|
|
|
{ |
37
|
|
|
array_walk($from_lines, array($this, '_trimNewlines')); |
38
|
|
|
array_walk($to_lines, array($this, '_trimNewlines')); |
39
|
|
|
|
40
|
|
|
if (extension_loaded('xdiff')) { |
41
|
|
|
$engine = new Text_Diff_Engine_xdiff(); |
42
|
|
|
} else { |
43
|
|
|
$engine = new Text_Diff_Engine_native(); |
44
|
|
|
} |
45
|
|
|
|
46
|
|
|
$this->_edits = $engine->diff($from_lines, $to_lines); |
47
|
|
|
} |
48
|
|
|
|
49
|
|
|
/** |
50
|
|
|
* Returns the array of differences. |
51
|
|
|
*/ |
52
|
|
|
public function getDiff() |
53
|
|
|
{ |
54
|
|
|
return $this->_edits; |
55
|
|
|
} |
56
|
|
|
|
57
|
|
|
/** |
58
|
|
|
* Computes a reversed diff. |
59
|
|
|
* |
60
|
|
|
* Example: |
61
|
|
|
* <code> |
62
|
|
|
* $diff = new Text_Diff($lines1, $lines2); |
63
|
|
|
* $rev = $diff->reverse(); |
64
|
|
|
* </code> |
65
|
|
|
* |
66
|
|
|
* @return Text_Diff A Diff object representing the inverse of the |
67
|
|
|
* original diff. Note that we purposely don't return a |
68
|
|
|
* reference here, since this essentially is a clone() |
69
|
|
|
* method. |
70
|
|
|
*/ |
71
|
|
|
public function reverse() |
72
|
|
|
{ |
73
|
|
|
if (version_compare(zend_version(), '2', '>')) { |
74
|
|
|
$rev = clone$obj; |
|
|
|
|
75
|
|
|
} else { |
76
|
|
|
$rev = $this; |
77
|
|
|
} |
78
|
|
|
$rev->_edits = array(); |
79
|
|
|
foreach ($this->_edits as $edit) { |
80
|
|
|
$rev->_edits[] = $edit->reverse(); |
81
|
|
|
} |
82
|
|
|
return $rev; |
83
|
|
|
} |
84
|
|
|
|
85
|
|
|
/** |
86
|
|
|
* Checks for an empty diff. |
87
|
|
|
* |
88
|
|
|
* @return boolean True if two sequences were identical. |
89
|
|
|
*/ |
90
|
|
|
public function isEmpty() |
91
|
|
|
{ |
92
|
|
|
foreach ($this->_edits as $edit) { |
93
|
|
|
if (!is_a($edit, 'Text_Diff_Op_copy')) { |
94
|
|
|
return false; |
95
|
|
|
} |
96
|
|
|
} |
97
|
|
|
return true; |
98
|
|
|
} |
99
|
|
|
|
100
|
|
|
/** |
101
|
|
|
* Computes the length of the Longest Common Subsequence (LCS). |
102
|
|
|
* |
103
|
|
|
* This is mostly for diagnostic purposes. |
104
|
|
|
* |
105
|
|
|
* @return integer The length of the LCS. |
106
|
|
|
*/ |
107
|
|
|
public function lcs() |
108
|
|
|
{ |
109
|
|
|
$lcs = 0; |
110
|
|
|
foreach ($this->_edits as $edit) { |
111
|
|
|
if (is_a($edit, 'Text_Diff_Op_copy')) { |
112
|
|
|
$lcs += count($edit->orig); |
113
|
|
|
} |
114
|
|
|
} |
115
|
|
|
return $lcs; |
116
|
|
|
} |
117
|
|
|
|
118
|
|
|
/** |
119
|
|
|
* Gets the original set of lines. |
120
|
|
|
* |
121
|
|
|
* This reconstructs the $from_lines parameter passed to the constructor. |
122
|
|
|
* |
123
|
|
|
* @return array The original sequence of strings. |
124
|
|
|
*/ |
125
|
|
View Code Duplication |
public function getOriginal() |
|
|
|
|
126
|
|
|
{ |
127
|
|
|
$lines = array(); |
128
|
|
|
foreach ($this->_edits as $edit) { |
129
|
|
|
if ($edit->orig) { |
130
|
|
|
array_splice($lines, count($lines), 0, $edit->orig); |
131
|
|
|
} |
132
|
|
|
} |
133
|
|
|
return $lines; |
134
|
|
|
} |
135
|
|
|
|
136
|
|
|
/** |
137
|
|
|
* Gets the final set of lines. |
138
|
|
|
* |
139
|
|
|
* This reconstructs the $to_lines parameter passed to the constructor. |
140
|
|
|
* |
141
|
|
|
* @return array The sequence of strings. |
142
|
|
|
*/ |
143
|
|
View Code Duplication |
public function getFinal() |
|
|
|
|
144
|
|
|
{ |
145
|
|
|
$lines = array(); |
146
|
|
|
foreach ($this->_edits as $edit) { |
147
|
|
|
if ($edit->final) { |
148
|
|
|
array_splice($lines, count($lines), 0, $edit->final); |
149
|
|
|
} |
150
|
|
|
} |
151
|
|
|
return $lines; |
152
|
|
|
} |
153
|
|
|
|
154
|
|
|
/** |
155
|
|
|
* Removes trailing newlines from a line of text. This is meant to be used |
156
|
|
|
* with array_walk(). |
157
|
|
|
* |
158
|
|
|
* @param string $line The line to trim. |
159
|
|
|
* @param integer $key The index of the line in the array. Not used. |
160
|
|
|
*/ |
161
|
|
|
public function _trimNewlines(&$line, $key) |
|
|
|
|
162
|
|
|
{ |
163
|
|
|
$line = str_replace(array("\n", "\r"), '', $line); |
164
|
|
|
} |
165
|
|
|
|
166
|
|
|
/** |
167
|
|
|
* Checks a diff for validity. |
168
|
|
|
* |
169
|
|
|
* This is here only for debugging purposes. |
170
|
|
|
*/ |
171
|
|
|
public function _check($from_lines, $to_lines) |
172
|
|
|
{ |
173
|
|
|
if (serialize($from_lines) != serialize($this->getOriginal())) { |
174
|
|
|
trigger_error("Reconstructed original doesn't match", E_USER_ERROR); |
175
|
|
|
} |
176
|
|
|
if (serialize($to_lines) != serialize($this->getFinal())) { |
177
|
|
|
trigger_error("Reconstructed final doesn't match", E_USER_ERROR); |
178
|
|
|
} |
179
|
|
|
|
180
|
|
|
$rev = $this->reverse(); |
181
|
|
|
if (serialize($to_lines) != serialize($rev->getOriginal())) { |
182
|
|
|
trigger_error("Reversed original doesn't match", E_USER_ERROR); |
183
|
|
|
} |
184
|
|
|
if (serialize($from_lines) != serialize($rev->getFinal())) { |
185
|
|
|
trigger_error("Reversed final doesn't match", E_USER_ERROR); |
186
|
|
|
} |
187
|
|
|
|
188
|
|
|
$prevtype = null; |
189
|
|
|
foreach ($this->_edits as $edit) { |
190
|
|
|
if ($prevtype == get_class($edit)) { |
191
|
|
|
trigger_error('Edit sequence is non-optimal', E_USER_ERROR); |
192
|
|
|
} |
193
|
|
|
$prevtype = get_class($edit); |
194
|
|
|
} |
195
|
|
|
|
196
|
|
|
return true; |
197
|
|
|
} |
198
|
|
|
} |
199
|
|
|
|
200
|
|
|
/** |
201
|
|
|
* $Horde: framework/Text_Diff/Diff.php,v 1.13 2005/05/26 20:26:06 selsky Exp $ |
202
|
|
|
* |
203
|
|
|
* @package Text_Diff |
204
|
|
|
* @author Geoffrey T. Dairiki <[email protected]> |
205
|
|
|
*/ |
206
|
|
|
class Text_MappedDiff extends Text_Diff |
207
|
|
|
{ |
208
|
|
|
|
209
|
|
|
/** |
210
|
|
|
* Computes a diff between sequences of strings. |
211
|
|
|
* |
212
|
|
|
* This can be used to compute things like case-insensitve diffs, or diffs |
213
|
|
|
* which ignore changes in white-space. |
214
|
|
|
* |
215
|
|
|
* @param array $from_lines An array of strings. |
216
|
|
|
* @param array $to_lines An array of strings. |
217
|
|
|
* @param array $mapped_from_lines This array should have the same size |
218
|
|
|
* number of elements as $from_lines. The |
219
|
|
|
* elements in $mapped_from_lines and |
220
|
|
|
* $mapped_to_lines are what is actually |
221
|
|
|
* compared when computing the diff. |
222
|
|
|
* @param array $mapped_to_lines This array should have the same number |
223
|
|
|
* of elements as $to_lines. |
224
|
|
|
*/ |
225
|
|
|
public function __construct($from_lines, $to_lines, |
226
|
|
|
$mapped_from_lines, $mapped_to_lines) |
227
|
|
|
{ |
228
|
|
|
assert(count($from_lines) == count($mapped_from_lines)); |
229
|
|
|
assert(count($to_lines) == count($mapped_to_lines)); |
230
|
|
|
|
231
|
|
|
parent::__construct($mapped_from_lines, $mapped_to_lines); |
232
|
|
|
|
233
|
|
|
$xi = $yi = 0; |
234
|
|
|
for ($i = 0; $i < count($this->_edits); $i++) { |
|
|
|
|
235
|
|
|
$orig = &$this->_edits[$i]->orig; |
236
|
|
|
if (is_array($orig)) { |
237
|
|
|
$orig = array_slice($from_lines, $xi, count($orig)); |
238
|
|
|
$xi += count($orig); |
239
|
|
|
} |
240
|
|
|
|
241
|
|
|
$final = &$this->_edits[$i]->final; |
242
|
|
|
if (is_array($final)) { |
243
|
|
|
$final = array_slice($to_lines, $yi, count($final)); |
244
|
|
|
$yi += count($final); |
245
|
|
|
} |
246
|
|
|
} |
247
|
|
|
} |
248
|
|
|
} |
249
|
|
|
|
250
|
|
|
/** |
251
|
|
|
* Class used internally by Diff to actually compute the diffs. This class |
252
|
|
|
* uses the xdiff PECL package (http://pecl.php.net/package/xdiff) to compute |
253
|
|
|
* the differences between the two input arrays. |
254
|
|
|
* |
255
|
|
|
* @author Jon Parise <[email protected]> |
256
|
|
|
* @package Text_Diff |
257
|
|
|
* |
258
|
|
|
* @access private |
259
|
|
|
*/ |
260
|
|
|
class Text_Diff_Engine_xdiff |
261
|
|
|
{ |
262
|
|
|
|
263
|
|
|
public function diff($from_lines, $to_lines) |
264
|
|
|
{ |
265
|
|
|
/* Convert the two input arrays into strings for xdiff processing. */ |
266
|
|
|
$from_string = implode("\n", $from_lines); |
267
|
|
|
$to_string = implode("\n", $to_lines); |
268
|
|
|
|
269
|
|
|
/* Diff the two strings and convert the result to an array. */ |
270
|
|
|
$diff = xdiff_string_diff($from_string, $to_string, count($to_lines)); |
271
|
|
|
$diff = explode("\n", $diff); |
272
|
|
|
|
273
|
|
|
/* Walk through the diff one line at a time. We build the $edits |
274
|
|
|
* array of diff operations by reading the first character of the |
275
|
|
|
* xdiff output (which is in the "unified diff" format). |
276
|
|
|
* |
277
|
|
|
* Note that we don't have enough information to detect "changed" |
278
|
|
|
* lines using this approach, so we can't add Text_Diff_Op_changed |
279
|
|
|
* instances to the $edits array. The result is still perfectly |
280
|
|
|
* valid, albeit a little less descriptive and efficient. */ |
281
|
|
|
$edits = array(); |
282
|
|
|
foreach ($diff as $line) { |
283
|
|
|
switch ($line[0]) { |
284
|
|
|
case ' ': |
285
|
|
|
$edits[] = new Text_Diff_Op_copy(array(substr($line, 1))); |
286
|
|
|
break; |
287
|
|
|
|
288
|
|
|
case '+': |
289
|
|
|
$edits[] = new Text_Diff_Op_add(array(substr($line, 1))); |
290
|
|
|
break; |
291
|
|
|
|
292
|
|
|
case '-': |
293
|
|
|
$edits[] = new Text_Diff_Op_delete(array(substr($line, 1))); |
294
|
|
|
break; |
295
|
|
|
} |
296
|
|
|
} |
297
|
|
|
|
298
|
|
|
return $edits; |
299
|
|
|
} |
300
|
|
|
} |
301
|
|
|
|
302
|
|
|
/** |
303
|
|
|
* Class used internally by Diff to actually compute the diffs. This class is |
304
|
|
|
* implemented using native PHP code. |
305
|
|
|
* |
306
|
|
|
* The algorithm used here is mostly lifted from the perl module |
307
|
|
|
* Algorithm::Diff (version 1.06) by Ned Konz, which is available at: |
308
|
|
|
* http://www.perl.com/CPAN/authors/id/N/NE/NEDKONZ/Algorithm-Diff-1.06.zip |
309
|
|
|
* |
310
|
|
|
* More ideas are taken from: |
311
|
|
|
* http://www.ics.uci.edu/~eppstein/161/960229.html |
312
|
|
|
* |
313
|
|
|
* Some ideas (and a bit of code) are taken from analyze.c, of GNU |
314
|
|
|
* diffutils-2.7, which can be found at: |
315
|
|
|
* ftp://gnudist.gnu.org/pub/gnu/diffutils/diffutils-2.7.tar.gz |
316
|
|
|
* |
317
|
|
|
* Some ideas (subdivision by NCHUNKS > 2, and some optimizations) are from |
318
|
|
|
* Geoffrey T. Dairiki <[email protected]>. The original PHP version of this |
319
|
|
|
* code was written by him, and is used/adapted with his permission. |
320
|
|
|
* |
321
|
|
|
* @author Geoffrey T. Dairiki <[email protected]> |
322
|
|
|
* @package Text_Diff |
323
|
|
|
* |
324
|
|
|
* @access private |
325
|
|
|
*/ |
326
|
|
|
class Text_Diff_Engine_native |
327
|
|
|
{ |
328
|
|
|
|
329
|
|
|
public function diff($from_lines, $to_lines) |
330
|
|
|
{ |
331
|
|
|
$n_from = count($from_lines); |
332
|
|
|
$n_to = count($to_lines); |
333
|
|
|
|
334
|
|
|
$this->xchanged = $this->ychanged = array(); |
|
|
|
|
335
|
|
|
$this->xv = $this->yv = array(); |
|
|
|
|
336
|
|
|
$this->xind = $this->yind = array(); |
|
|
|
|
337
|
|
|
unset($this->seq); |
338
|
|
|
unset($this->in_seq); |
339
|
|
|
unset($this->lcs); |
340
|
|
|
|
341
|
|
|
// Skip leading common lines. |
342
|
|
|
for ($skip = 0; $skip < $n_from && $skip < $n_to; $skip++) { |
343
|
|
|
if ($from_lines[$skip] != $to_lines[$skip]) { |
344
|
|
|
break; |
345
|
|
|
} |
346
|
|
|
$this->xchanged[$skip] = $this->ychanged[$skip] = false; |
347
|
|
|
} |
348
|
|
|
|
349
|
|
|
// Skip trailing common lines. |
350
|
|
|
$xi = $n_from; |
351
|
|
|
$yi = $n_to; |
352
|
|
|
for ($endskip = 0; --$xi > $skip && --$yi > $skip; $endskip++) { |
353
|
|
|
if ($from_lines[$xi] != $to_lines[$yi]) { |
354
|
|
|
break; |
355
|
|
|
} |
356
|
|
|
$this->xchanged[$xi] = $this->ychanged[$yi] = false; |
357
|
|
|
} |
358
|
|
|
|
359
|
|
|
// Ignore lines which do not exist in both files. |
360
|
|
|
for ($xi = $skip; $xi < $n_from - $endskip; $xi++) { |
361
|
|
|
$xhash[$from_lines[$xi]] = 1; |
|
|
|
|
362
|
|
|
} |
363
|
|
|
for ($yi = $skip; $yi < $n_to - $endskip; $yi++) { |
364
|
|
|
$line = $to_lines[$yi]; |
365
|
|
|
if ($this->ychanged[$yi] = empty($xhash[$line])) { |
366
|
|
|
continue; |
367
|
|
|
} |
368
|
|
|
$yhash[$line] = 1; |
|
|
|
|
369
|
|
|
$this->yv[] = $line; |
370
|
|
|
$this->yind[] = $yi; |
371
|
|
|
} |
372
|
|
|
for ($xi = $skip; $xi < $n_from - $endskip; $xi++) { |
373
|
|
|
$line = $from_lines[$xi]; |
374
|
|
|
if ($this->xchanged[$xi] = empty($yhash[$line])) { |
375
|
|
|
continue; |
376
|
|
|
} |
377
|
|
|
$this->xv[] = $line; |
378
|
|
|
$this->xind[] = $xi; |
379
|
|
|
} |
380
|
|
|
|
381
|
|
|
// Find the LCS. |
382
|
|
|
$this->_compareseq(0, count($this->xv), 0, count($this->yv)); |
383
|
|
|
|
384
|
|
|
// Merge edits when possible. |
385
|
|
|
$this->_shiftBoundaries($from_lines, $this->xchanged, $this->ychanged); |
386
|
|
|
$this->_shiftBoundaries($to_lines, $this->ychanged, $this->xchanged); |
387
|
|
|
|
388
|
|
|
// Compute the edit operations. |
389
|
|
|
$edits = array(); |
390
|
|
|
$xi = $yi = 0; |
391
|
|
|
while ($xi < $n_from || $yi < $n_to) { |
392
|
|
|
assert($yi < $n_to || $this->xchanged[$xi]); |
393
|
|
|
assert($xi < $n_from || $this->ychanged[$yi]); |
394
|
|
|
|
395
|
|
|
// Skip matching "snake". |
396
|
|
|
$copy = array(); |
397
|
|
|
while ($xi < $n_from && $yi < $n_to |
398
|
|
|
&& !$this->xchanged[$xi] && !$this->ychanged[$yi]) { |
399
|
|
|
$copy[] = $from_lines[$xi++]; |
400
|
|
|
++$yi; |
401
|
|
|
} |
402
|
|
|
if ($copy) { |
|
|
|
|
403
|
|
|
$edits[] = new Text_Diff_Op_copy($copy); |
404
|
|
|
} |
405
|
|
|
|
406
|
|
|
// Find deletes & adds. |
407
|
|
|
$delete = array(); |
408
|
|
|
while ($xi < $n_from && $this->xchanged[$xi]) { |
409
|
|
|
$delete[] = $from_lines[$xi++]; |
410
|
|
|
} |
411
|
|
|
|
412
|
|
|
$add = array(); |
413
|
|
|
while ($yi < $n_to && $this->ychanged[$yi]) { |
414
|
|
|
$add[] = $to_lines[$yi++]; |
415
|
|
|
} |
416
|
|
|
|
417
|
|
|
if ($delete && $add) { |
|
|
|
|
418
|
|
|
$edits[] = new Text_Diff_Op_change($delete, $add); |
419
|
|
|
} elseif ($delete) { |
|
|
|
|
420
|
|
|
$edits[] = new Text_Diff_Op_delete($delete); |
421
|
|
|
} elseif ($add) { |
|
|
|
|
422
|
|
|
$edits[] = new Text_Diff_Op_add($add); |
423
|
|
|
} |
424
|
|
|
} |
425
|
|
|
|
426
|
|
|
return $edits; |
427
|
|
|
} |
428
|
|
|
|
429
|
|
|
/** |
430
|
|
|
* Divides the Largest Common Subsequence (LCS) of the sequences (XOFF, |
431
|
|
|
* XLIM) and (YOFF, YLIM) into NCHUNKS approximately equally sized |
432
|
|
|
* segments. |
433
|
|
|
* |
434
|
|
|
* Returns (LCS, PTS). LCS is the length of the LCS. PTS is an array of |
435
|
|
|
* NCHUNKS+1 (X, Y) indexes giving the diving points between sub |
436
|
|
|
* sequences. The first sub-sequence is contained in (X0, X1), (Y0, Y1), |
437
|
|
|
* the second in (X1, X2), (Y1, Y2) and so on. Note that (X0, Y0) == |
438
|
|
|
* (XOFF, YOFF) and (X[NCHUNKS], Y[NCHUNKS]) == (XLIM, YLIM). |
439
|
|
|
* |
440
|
|
|
* This function assumes that the first lines of the specified portions of |
441
|
|
|
* the two files do not match, and likewise that the last lines do not |
442
|
|
|
* match. The caller must trim matching lines from the beginning and end |
443
|
|
|
* of the portions it is going to specify. |
444
|
|
|
*/ |
445
|
|
|
public function _diag($xoff, $xlim, $yoff, $ylim, $nchunks) |
446
|
|
|
{ |
447
|
|
|
$flip = false; |
448
|
|
|
|
449
|
|
|
if ($xlim - $xoff > $ylim - $yoff) { |
450
|
|
|
/* Things seems faster (I'm not sure I understand why) when the |
451
|
|
|
* shortest sequence is in X. */ |
452
|
|
|
$flip = true; |
453
|
|
|
list($xoff, $xlim, $yoff, $ylim) |
454
|
|
|
= array($yoff, $ylim, $xoff, $xlim); |
455
|
|
|
} |
456
|
|
|
|
457
|
|
|
if ($flip) { |
458
|
|
|
for ($i = $ylim - 1; $i >= $yoff; $i--) { |
459
|
|
|
$ymatches[$this->xv[$i]][] = $i; |
|
|
|
|
460
|
|
|
} |
461
|
|
|
} else { |
462
|
|
|
for ($i = $ylim - 1; $i >= $yoff; $i--) { |
463
|
|
|
$ymatches[$this->yv[$i]][] = $i; |
|
|
|
|
464
|
|
|
} |
465
|
|
|
} |
466
|
|
|
|
467
|
|
|
$this->lcs = 0; |
|
|
|
|
468
|
|
|
$this->seq[0]= $yoff - 1; |
|
|
|
|
469
|
|
|
$this->in_seq = array(); |
|
|
|
|
470
|
|
|
$ymids[0] = array(); |
|
|
|
|
471
|
|
|
|
472
|
|
|
$numer = $xlim - $xoff + $nchunks - 1; |
473
|
|
|
$x = $xoff; |
474
|
|
|
for ($chunk = 0; $chunk < $nchunks; $chunk++) { |
475
|
|
|
if ($chunk > 0) { |
476
|
|
|
for ($i = 0; $i <= $this->lcs; $i++) { |
477
|
|
|
$ymids[$i][$chunk - 1] = $this->seq[$i]; |
478
|
|
|
} |
479
|
|
|
} |
480
|
|
|
|
481
|
|
|
$x1 = $xoff + (int)(($numer + ($xlim-$xoff)*$chunk) / $nchunks); |
482
|
|
|
for (; $x < $x1; $x++) { |
483
|
|
|
$line = $flip ? $this->yv[$x] : $this->xv[$x]; |
484
|
|
|
if (empty($ymatches[$line])) { |
485
|
|
|
continue; |
486
|
|
|
} |
487
|
|
|
$matches = $ymatches[$line]; |
488
|
|
|
foreach ($matches as $y) { |
489
|
|
View Code Duplication |
if (empty($this->in_seq[$y])) { |
|
|
|
|
490
|
|
|
$k = $this->_lcsPos($y); |
491
|
|
|
assert($k > 0); |
492
|
|
|
$ymids[$k] = $ymids[$k - 1]; |
493
|
|
|
break; |
494
|
|
|
} |
495
|
|
|
} |
496
|
|
|
|
497
|
|
|
while (list($junk, $y) = each($matches)) { |
|
|
|
|
498
|
|
|
if ($y > $this->seq[$k - 1]) { |
499
|
|
|
// assert($y < $this->seq[$k]); // GIJ Patched |
|
|
|
|
500
|
|
|
/* Optimization: this is a common case: next match is |
501
|
|
|
* just replacing previous match. */ |
502
|
|
|
$this->in_seq[$this->seq[$k]] = false; |
|
|
|
|
503
|
|
|
$this->seq[$k] = $y; |
504
|
|
|
$this->in_seq[$y] = 1; |
505
|
|
View Code Duplication |
} elseif (empty($this->in_seq[$y])) { |
|
|
|
|
506
|
|
|
$k = $this->_lcsPos($y); |
507
|
|
|
assert($k > 0); |
508
|
|
|
$ymids[$k] = $ymids[$k - 1]; |
509
|
|
|
} |
510
|
|
|
} |
511
|
|
|
} |
512
|
|
|
} |
513
|
|
|
|
514
|
|
|
$seps[] = $flip ? array($yoff, $xoff) : array($xoff, $yoff); |
|
|
|
|
515
|
|
|
$ymid = $ymids[$this->lcs]; |
516
|
|
|
for ($n = 0; $n < $nchunks - 1; $n++) { |
517
|
|
|
$x1 = $xoff + (int)(($numer + ($xlim - $xoff) * $n) / $nchunks); |
518
|
|
|
$y1 = $ymid[$n] + 1; |
519
|
|
|
$seps[] = $flip ? array($y1, $x1) : array($x1, $y1); |
520
|
|
|
} |
521
|
|
|
$seps[] = $flip ? array($ylim, $xlim) : array($xlim, $ylim); |
522
|
|
|
|
523
|
|
|
return array($this->lcs, $seps); |
524
|
|
|
} |
525
|
|
|
|
526
|
|
|
public function _lcsPos($ypos) |
527
|
|
|
{ |
528
|
|
|
$end = $this->lcs; |
529
|
|
|
if ($end == 0 || $ypos > $this->seq[$end]) { |
530
|
|
|
$this->seq[++$this->lcs] = $ypos; |
531
|
|
|
$this->in_seq[$ypos] = 1; |
532
|
|
|
return $this->lcs; |
533
|
|
|
} |
534
|
|
|
|
535
|
|
|
$beg = 1; |
536
|
|
|
while ($beg < $end) { |
537
|
|
|
$mid = (int)(($beg + $end) / 2); |
538
|
|
|
if ($ypos > $this->seq[$mid]) { |
539
|
|
|
$beg = $mid + 1; |
540
|
|
|
} else { |
541
|
|
|
$end = $mid; |
542
|
|
|
} |
543
|
|
|
} |
544
|
|
|
|
545
|
|
|
assert($ypos != $this->seq[$end]); |
546
|
|
|
|
547
|
|
|
$this->in_seq[$this->seq[$end]] = false; |
548
|
|
|
$this->seq[$end] = $ypos; |
549
|
|
|
$this->in_seq[$ypos] = 1; |
550
|
|
|
return $end; |
551
|
|
|
} |
552
|
|
|
|
553
|
|
|
/** |
554
|
|
|
* Finds LCS of two sequences. |
555
|
|
|
* |
556
|
|
|
* The results are recorded in the vectors $this->{x,y}changed[], by |
557
|
|
|
* storing a 1 in the element for each line that is an insertion or |
558
|
|
|
* deletion (ie. is not in the LCS). |
559
|
|
|
* |
560
|
|
|
* The subsequence of file 0 is (XOFF, XLIM) and likewise for file 1. |
561
|
|
|
* |
562
|
|
|
* Note that XLIM, YLIM are exclusive bounds. All line numbers are |
563
|
|
|
* origin-0 and discarded lines are not counted. |
564
|
|
|
*/ |
565
|
|
|
public function _compareseq($xoff, $xlim, $yoff, $ylim) |
566
|
|
|
{ |
567
|
|
|
/* Slide down the bottom initial diagonal. */ |
568
|
|
|
while ($xoff < $xlim && $yoff < $ylim |
569
|
|
|
&& $this->xv[$xoff] == $this->yv[$yoff]) { |
570
|
|
|
++$xoff; |
571
|
|
|
++$yoff; |
572
|
|
|
} |
573
|
|
|
|
574
|
|
|
/* Slide up the top initial diagonal. */ |
575
|
|
|
while ($xlim > $xoff && $ylim > $yoff |
576
|
|
|
&& $this->xv[$xlim - 1] == $this->yv[$ylim - 1]) { |
577
|
|
|
--$xlim; |
578
|
|
|
--$ylim; |
579
|
|
|
} |
580
|
|
|
|
581
|
|
|
if ($xoff == $xlim || $yoff == $ylim) { |
582
|
|
|
$lcs = 0; |
583
|
|
|
} else { |
584
|
|
|
/* This is ad hoc but seems to work well. $nchunks = |
|
|
|
|
585
|
|
|
* sqrt(min($xlim - $xoff, $ylim - $yoff) / 2.5); $nchunks = |
586
|
|
|
* max(2,min(8,(int)$nchunks)); */ |
587
|
|
|
$nchunks = min(7, $xlim - $xoff, $ylim - $yoff) + 1; |
588
|
|
|
list($lcs, $seps) |
589
|
|
|
= $this->_diag($xoff, $xlim, $yoff, $ylim, $nchunks); |
590
|
|
|
} |
591
|
|
|
|
592
|
|
|
if ($lcs == 0) { |
593
|
|
|
/* X and Y sequences have no common subsequence: mark all |
594
|
|
|
* changed. */ |
595
|
|
|
while ($yoff < $ylim) { |
596
|
|
|
$this->ychanged[$this->yind[$yoff++]] = 1; |
597
|
|
|
} |
598
|
|
|
while ($xoff < $xlim) { |
599
|
|
|
$this->xchanged[$this->xind[$xoff++]] = 1; |
600
|
|
|
} |
601
|
|
|
} else { |
602
|
|
|
/* Use the partitions to split this problem into subproblems. */ |
603
|
|
|
reset($seps); |
604
|
|
|
$pt1 = $seps[0]; |
605
|
|
|
while ($pt2 = next($seps)) { |
606
|
|
|
$this->_compareseq($pt1[0], $pt2[0], $pt1[1], $pt2[1]); |
607
|
|
|
$pt1 = $pt2; |
608
|
|
|
} |
609
|
|
|
} |
610
|
|
|
} |
611
|
|
|
|
612
|
|
|
/** |
613
|
|
|
* Adjusts inserts/deletes of identical lines to join changes as much as |
614
|
|
|
* possible. |
615
|
|
|
* |
616
|
|
|
* We do something when a run of changed lines include a line at one end |
617
|
|
|
* and has an excluded, identical line at the other. We are free to |
618
|
|
|
* choose which identical line is included. `compareseq' usually chooses |
619
|
|
|
* the one at the beginning, but usually it is cleaner to consider the |
620
|
|
|
* following identical line to be the "change". |
621
|
|
|
* |
622
|
|
|
* This is extracted verbatim from analyze.c (GNU diffutils-2.7). |
623
|
|
|
*/ |
624
|
|
|
public function _shiftBoundaries($lines, &$changed, $other_changed) |
625
|
|
|
{ |
626
|
|
|
$i = 0; |
627
|
|
|
$j = 0; |
628
|
|
|
|
629
|
|
|
assert('count($lines) == count($changed)'); |
630
|
|
|
$len = count($lines); |
631
|
|
|
$other_len = count($other_changed); |
632
|
|
|
|
633
|
|
|
while (1) { |
634
|
|
|
/* Scan forward to find the beginning of another run of |
635
|
|
|
* changes. Also keep track of the corresponding point in the |
636
|
|
|
* other file. |
637
|
|
|
* |
638
|
|
|
* Throughout this code, $i and $j are adjusted together so that |
639
|
|
|
* the first $i elements of $changed and the first $j elements of |
640
|
|
|
* $other_changed both contain the same number of zeros (unchanged |
641
|
|
|
* lines). |
642
|
|
|
* |
643
|
|
|
* Furthermore, $j is always kept so that $j == $other_len or |
644
|
|
|
* $other_changed[$j] == false. */ |
645
|
|
|
while ($j < $other_len && $other_changed[$j]) { |
646
|
|
|
$j++; |
647
|
|
|
} |
648
|
|
|
|
649
|
|
|
while ($i < $len && ! $changed[$i]) { |
650
|
|
|
assert('$j < $other_len && ! $other_changed[$j]'); |
651
|
|
|
$i++; |
652
|
|
|
$j++; |
653
|
|
|
while ($j < $other_len && $other_changed[$j]) { |
654
|
|
|
$j++; |
655
|
|
|
} |
656
|
|
|
} |
657
|
|
|
|
658
|
|
|
if ($i == $len) { |
659
|
|
|
break; |
660
|
|
|
} |
661
|
|
|
|
662
|
|
|
$start = $i; |
663
|
|
|
|
664
|
|
|
/* Find the end of this run of changes. */ |
665
|
|
|
while (++$i < $len && $changed[$i]) { |
666
|
|
|
continue; |
667
|
|
|
} |
668
|
|
|
|
669
|
|
|
do { |
670
|
|
|
/* Record the length of this run of changes, so that we can |
671
|
|
|
* later determine whether the run has grown. */ |
672
|
|
|
$runlength = $i - $start; |
673
|
|
|
|
674
|
|
|
/* Move the changed region back, so long as the previous |
675
|
|
|
* unchanged line matches the last changed one. This merges |
676
|
|
|
* with previous changed regions. */ |
677
|
|
|
while ($start > 0 && $lines[$start - 1] == $lines[$i - 1]) { |
678
|
|
|
$changed[--$start] = 1; |
679
|
|
|
$changed[--$i] = false; |
680
|
|
|
while ($start > 0 && $changed[$start - 1]) { |
681
|
|
|
$start--; |
682
|
|
|
} |
683
|
|
|
assert('$j > 0'); |
684
|
|
|
while ($other_changed[--$j]) { |
685
|
|
|
continue; |
686
|
|
|
} |
687
|
|
|
assert('$j >= 0 && !$other_changed[$j]'); |
688
|
|
|
} |
689
|
|
|
|
690
|
|
|
/* Set CORRESPONDING to the end of the changed run, at the |
691
|
|
|
* last point where it corresponds to a changed run in the |
692
|
|
|
* other file. CORRESPONDING == LEN means no such point has |
693
|
|
|
* been found. */ |
694
|
|
|
$corresponding = $j < $other_len ? $i : $len; |
695
|
|
|
|
696
|
|
|
/* Move the changed region forward, so long as the first |
697
|
|
|
* changed line matches the following unchanged one. This |
698
|
|
|
* merges with following changed regions. Do this second, so |
699
|
|
|
* that if there are no merges, the changed region is moved |
700
|
|
|
* forward as far as possible. */ |
701
|
|
|
while ($i < $len && $lines[$start] == $lines[$i]) { |
702
|
|
|
$changed[$start++] = false; |
703
|
|
|
$changed[$i++] = 1; |
704
|
|
|
while ($i < $len && $changed[$i]) { |
705
|
|
|
$i++; |
706
|
|
|
} |
707
|
|
|
|
708
|
|
|
assert('$j < $other_len && ! $other_changed[$j]'); |
709
|
|
|
$j++; |
710
|
|
|
if ($j < $other_len && $other_changed[$j]) { |
711
|
|
|
$corresponding = $i; |
712
|
|
|
while ($j < $other_len && $other_changed[$j]) { |
713
|
|
|
$j++; |
714
|
|
|
} |
715
|
|
|
} |
716
|
|
|
} |
717
|
|
|
} while ($runlength != $i - $start); |
718
|
|
|
|
719
|
|
|
/* If possible, move the fully-merged run of changes back to a |
720
|
|
|
* corresponding run in the other file. */ |
721
|
|
|
while ($corresponding < $i) { |
722
|
|
|
$changed[--$start] = 1; |
723
|
|
|
$changed[--$i] = 0; |
724
|
|
|
assert('$j > 0'); |
725
|
|
|
while ($other_changed[--$j]) { |
726
|
|
|
continue; |
727
|
|
|
} |
728
|
|
|
assert('$j >= 0 && !$other_changed[$j]'); |
729
|
|
|
} |
730
|
|
|
} |
731
|
|
|
} |
732
|
|
|
} |
733
|
|
|
|
734
|
|
|
/** |
735
|
|
|
* @package Text_Diff |
736
|
|
|
* @author Geoffrey T. Dairiki <[email protected]> |
737
|
|
|
* |
738
|
|
|
* @access private |
739
|
|
|
*/ |
740
|
|
|
class Text_Diff_Op |
741
|
|
|
{ |
742
|
|
|
|
743
|
|
|
public $orig; |
744
|
|
|
public $final; |
745
|
|
|
|
746
|
|
|
public function reverse() |
747
|
|
|
{ |
748
|
|
|
trigger_error('Abstract method', E_USER_ERROR); |
749
|
|
|
} |
750
|
|
|
|
751
|
|
|
public function norig() |
752
|
|
|
{ |
753
|
|
|
return $this->orig ? count($this->orig) : 0; |
754
|
|
|
} |
755
|
|
|
|
756
|
|
|
public function nfinal() |
757
|
|
|
{ |
758
|
|
|
return $this->final ? count($this->final) : 0; |
759
|
|
|
} |
760
|
|
|
} |
761
|
|
|
|
762
|
|
|
/** |
763
|
|
|
* @package Text_Diff |
764
|
|
|
* @author Geoffrey T. Dairiki <[email protected]> |
765
|
|
|
* |
766
|
|
|
* @access private |
767
|
|
|
*/ |
768
|
|
|
class Text_Diff_Op_copy extends Text_Diff_Op |
769
|
|
|
{ |
770
|
|
|
|
771
|
|
|
public function __construct($orig, $final = false) |
772
|
|
|
{ |
773
|
|
|
if (!is_array($final)) { |
774
|
|
|
$final = $orig; |
775
|
|
|
} |
776
|
|
|
$this->orig = $orig; |
777
|
|
|
$this->final = $final; |
778
|
|
|
} |
779
|
|
|
|
780
|
|
|
public function &reverse() |
781
|
|
|
{ |
782
|
|
|
return $reverse = new Text_Diff_Op_copy($this->final, $this->orig); |
|
|
|
|
783
|
|
|
} |
784
|
|
|
} |
785
|
|
|
|
786
|
|
|
/** |
787
|
|
|
* @package Text_Diff |
788
|
|
|
* @author Geoffrey T. Dairiki <[email protected]> |
789
|
|
|
* |
790
|
|
|
* @access private |
791
|
|
|
*/ |
792
|
|
View Code Duplication |
class Text_Diff_Op_delete extends Text_Diff_Op |
|
|
|
|
793
|
|
|
{ |
794
|
|
|
|
795
|
|
|
public function __construct($lines) |
796
|
|
|
{ |
797
|
|
|
$this->orig = $lines; |
798
|
|
|
$this->final = false; |
799
|
|
|
} |
800
|
|
|
|
801
|
|
|
public function &reverse() |
802
|
|
|
{ |
803
|
|
|
return $reverse = new Text_Diff_Op_add($this->orig); |
|
|
|
|
804
|
|
|
} |
805
|
|
|
} |
806
|
|
|
|
807
|
|
|
/** |
808
|
|
|
* @package Text_Diff |
809
|
|
|
* @author Geoffrey T. Dairiki <[email protected]> |
810
|
|
|
* |
811
|
|
|
* @access private |
812
|
|
|
*/ |
813
|
|
View Code Duplication |
class Text_Diff_Op_add extends Text_Diff_Op |
|
|
|
|
814
|
|
|
{ |
815
|
|
|
|
816
|
|
|
public function __construct($lines) |
817
|
|
|
{ |
818
|
|
|
$this->final = $lines; |
819
|
|
|
$this->orig = false; |
820
|
|
|
} |
821
|
|
|
|
822
|
|
|
public function &reverse() |
823
|
|
|
{ |
824
|
|
|
return $reverse = new Text_Diff_Op_delete($this->final); |
|
|
|
|
825
|
|
|
} |
826
|
|
|
} |
827
|
|
|
|
828
|
|
|
/** |
829
|
|
|
* @package Text_Diff |
830
|
|
|
* @author Geoffrey T. Dairiki <[email protected]> |
831
|
|
|
* |
832
|
|
|
* @access private |
833
|
|
|
*/ |
834
|
|
|
class Text_Diff_Op_change extends Text_Diff_Op |
835
|
|
|
{ |
836
|
|
|
|
837
|
|
|
public function __construct($orig, $final) |
838
|
|
|
{ |
839
|
|
|
$this->orig = $orig; |
840
|
|
|
$this->final = $final; |
841
|
|
|
} |
842
|
|
|
|
843
|
|
|
public function &reverse() |
844
|
|
|
{ |
845
|
|
|
return $reverse = new Text_Diff_Op_change($this->final, $this->orig); |
|
|
|
|
846
|
|
|
} |
847
|
|
|
} |
848
|
|
|
|
Sometimes obsolete code just ends up commented out instead of removed. In this case it is better to remove the code once you have checked you do not need it.
The code might also have been commented out for debugging purposes. In this case it is vital that someone uncomments it again or your project may behave in very unexpected ways in production.
This check looks for comments that seem to be mostly valid code and reports them.