1
|
|
|
<?php |
2
|
|
|
/** |
3
|
|
|
* FINE granularity DIFF. |
4
|
|
|
* |
5
|
|
|
* Computes a set of instructions to convert the content of |
6
|
|
|
* one string into another. |
7
|
|
|
* |
8
|
|
|
* Copyright (c) 2011 Raymond Hill (http://raymondhill.net/blog/?p=441) |
9
|
|
|
* |
10
|
|
|
* Licensed under The MIT License |
11
|
|
|
* |
12
|
|
|
* Permission is hereby granted, free of charge, to any person obtaining a copy |
13
|
|
|
* of this software and associated documentation files (the "Software"), to deal |
14
|
|
|
* in the Software without restriction, including without limitation the rights |
15
|
|
|
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell |
16
|
|
|
* copies of the Software, and to permit persons to whom the Software is |
17
|
|
|
* furnished to do so, subject to the following conditions: |
18
|
|
|
* |
19
|
|
|
* The above copyright notice and this permission notice shall be included in |
20
|
|
|
* all copies or substantial portions of the Software. |
21
|
|
|
* |
22
|
|
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
23
|
|
|
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
24
|
|
|
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE |
25
|
|
|
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
26
|
|
|
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, |
27
|
|
|
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN |
28
|
|
|
* THE SOFTWARE. |
29
|
|
|
* |
30
|
|
|
* @copyright Copyright 2011 (c) Raymond Hill (http://raymondhill.net/blog/?p=441) |
31
|
|
|
* |
32
|
|
|
* @link http://www.raymondhill.net/finediff/ |
33
|
|
|
* |
34
|
|
|
* @version 0.6 |
35
|
|
|
* |
36
|
|
|
* @license MIT License (http://www.opensource.org/licenses/mit-license.php) |
37
|
|
|
*/ |
38
|
|
|
|
39
|
|
|
/** |
40
|
|
|
* Usage (simplest):. |
41
|
|
|
* |
42
|
|
|
* include 'finediff.php'; |
43
|
|
|
* |
44
|
|
|
* // for the stock stack, granularity values are: |
45
|
|
|
* // FineDiff::$paragraphGranularity = paragraph/line level |
46
|
|
|
* // FineDiff::$sentenceGranularity = sentence level |
47
|
|
|
* // FineDiff::$wordGranularity = word level |
48
|
|
|
* // FineDiff::$characterGranularity = character level [default] |
49
|
|
|
* |
50
|
|
|
* $opcodes = FineDiff::getDiffOpcodes($from_text, $to_text [, $granularityStack = null] ); |
51
|
|
|
* // store opcodes for later use... |
52
|
|
|
* |
53
|
|
|
* ... |
54
|
|
|
* |
55
|
|
|
* // restore $to_text from $from_text + $opcodes |
56
|
|
|
* include 'finediff.php'; |
57
|
|
|
* $to_text = FineDiff::renderToTextFromOpcodes($from_text, $opcodes); |
58
|
|
|
* |
59
|
|
|
* ... |
60
|
|
|
*/ |
61
|
|
|
|
62
|
|
|
/** |
63
|
|
|
* Persisted opcodes (string) are a sequence of atomic opcode. |
64
|
|
|
* A single opcode can be one of the following: |
65
|
|
|
* c | c{n} | d | d{n} | i:{c} | i{length}:{s} |
66
|
|
|
* 'c' = copy one character from source |
67
|
|
|
* 'c{n}' = copy n characters from source |
68
|
|
|
* 'd' = skip one character from source |
69
|
|
|
* 'd{n}' = skip n characters from source |
70
|
|
|
* 'i:{c} = insert character 'c' |
71
|
|
|
* 'i{n}:{s}' = insert string s, which is of length n. |
72
|
|
|
* |
73
|
|
|
* Do not exist as of now, under consideration: |
74
|
|
|
* 'm{n}:{o} = move n characters from source o characters ahead. |
75
|
|
|
* It would be essentially a shortcut for a delete->copy->insert |
76
|
|
|
* command (swap) for when the inserted segment is exactly the same |
77
|
|
|
* as the deleted one, and with only a copy operation in between. |
78
|
|
|
* TODO: How often this case occurs? Is it worth it? Can only |
79
|
|
|
* be done as a postprocessing method (->optimize()?) |
80
|
|
|
*/ |
81
|
|
|
abstract class FineDiffOp |
82
|
|
|
{ |
83
|
|
|
abstract public function getFromLen(); |
84
|
|
|
|
85
|
|
|
abstract public function getToLen(); |
86
|
|
|
|
87
|
|
|
abstract public function getOpcode(); |
88
|
|
|
} |
89
|
|
|
|
90
|
|
|
class FineDiffDeleteOp extends FineDiffOp |
91
|
|
|
{ |
92
|
|
|
public function __construct($len) |
93
|
|
|
{ |
94
|
|
|
$this->fromLen = $len; |
|
|
|
|
95
|
|
|
} |
96
|
|
|
|
97
|
|
|
public function getFromLen() |
98
|
|
|
{ |
99
|
|
|
return $this->fromLen; |
100
|
|
|
} |
101
|
|
|
|
102
|
|
|
public function getToLen() |
103
|
|
|
{ |
104
|
|
|
return 0; |
105
|
|
|
} |
106
|
|
|
|
107
|
|
|
public function getOpcode() |
108
|
|
|
{ |
109
|
|
|
if ($this->fromLen === 1) { |
110
|
|
|
return 'd'; |
111
|
|
|
} |
112
|
|
|
|
113
|
|
|
return "d{$this->fromLen}"; |
114
|
|
|
} |
115
|
|
|
} |
116
|
|
|
|
117
|
|
|
class FineDiffInsertOp extends FineDiffOp |
118
|
|
|
{ |
119
|
|
|
public function __construct($text) |
120
|
|
|
{ |
121
|
|
|
$this->text = $text; |
|
|
|
|
122
|
|
|
} |
123
|
|
|
|
124
|
|
|
public function getFromLen() |
125
|
|
|
{ |
126
|
|
|
return 0; |
127
|
|
|
} |
128
|
|
|
|
129
|
|
|
public function getToLen() |
130
|
|
|
{ |
131
|
|
|
return strlen($this->text); |
132
|
|
|
} |
133
|
|
|
|
134
|
|
|
public function getText() |
135
|
|
|
{ |
136
|
|
|
return $this->text; |
137
|
|
|
} |
138
|
|
|
|
139
|
|
|
public function getOpcode() |
140
|
|
|
{ |
141
|
|
|
$to_len = strlen($this->text); |
142
|
|
|
if ($to_len === 1) { |
143
|
|
|
return "i:{$this->text}"; |
144
|
|
|
} |
145
|
|
|
|
146
|
|
|
return "i{$to_len}:{$this->text}"; |
147
|
|
|
} |
148
|
|
|
} |
149
|
|
|
|
150
|
|
|
class FineDiffReplaceOp extends FineDiffOp |
151
|
|
|
{ |
152
|
|
|
public function __construct($fromLen, $text) |
153
|
|
|
{ |
154
|
|
|
$this->fromLen = $fromLen; |
|
|
|
|
155
|
|
|
$this->text = $text; |
|
|
|
|
156
|
|
|
} |
157
|
|
|
|
158
|
|
|
public function getFromLen() |
159
|
|
|
{ |
160
|
|
|
return $this->fromLen; |
161
|
|
|
} |
162
|
|
|
|
163
|
|
|
public function getToLen() |
164
|
|
|
{ |
165
|
|
|
return strlen($this->text); |
166
|
|
|
} |
167
|
|
|
|
168
|
|
|
public function getText() |
169
|
|
|
{ |
170
|
|
|
return $this->text; |
171
|
|
|
} |
172
|
|
|
|
173
|
|
|
public function getOpcode() |
174
|
|
|
{ |
175
|
|
|
if ($this->fromLen === 1) { |
176
|
|
|
$del_opcode = 'd'; |
177
|
|
|
} else { |
178
|
|
|
$del_opcode = "d{$this->fromLen}"; |
179
|
|
|
} |
180
|
|
|
$to_len = strlen($this->text); |
181
|
|
|
if ($to_len === 1) { |
182
|
|
|
return "{$del_opcode}i:{$this->text}"; |
183
|
|
|
} |
184
|
|
|
|
185
|
|
|
return "{$del_opcode}i{$to_len}:{$this->text}"; |
186
|
|
|
} |
187
|
|
|
} |
188
|
|
|
|
189
|
|
|
class FineDiffCopyOp extends FineDiffOp |
190
|
|
|
{ |
191
|
|
|
public function __construct($len) |
192
|
|
|
{ |
193
|
|
|
$this->len = $len; |
|
|
|
|
194
|
|
|
} |
195
|
|
|
|
196
|
|
|
public function getFromLen() |
197
|
|
|
{ |
198
|
|
|
return $this->len; |
199
|
|
|
} |
200
|
|
|
|
201
|
|
|
public function getToLen() |
202
|
|
|
{ |
203
|
|
|
return $this->len; |
204
|
|
|
} |
205
|
|
|
|
206
|
|
|
public function getOpcode() |
207
|
|
|
{ |
208
|
|
|
if ($this->len === 1) { |
209
|
|
|
return 'c'; |
210
|
|
|
} |
211
|
|
|
|
212
|
|
|
return "c{$this->len}"; |
213
|
|
|
} |
214
|
|
|
|
215
|
|
|
public function increase($size) |
216
|
|
|
{ |
217
|
|
|
return $this->len += $size; |
218
|
|
|
} |
219
|
|
|
} |
220
|
|
|
|
221
|
|
|
/** |
222
|
|
|
* FineDiff ops. |
223
|
|
|
* |
224
|
|
|
* Collection of ops |
225
|
|
|
*/ |
226
|
|
|
class FineDiffOps |
227
|
|
|
{ |
228
|
|
|
public function appendOpcode($opcode, $from, $from_offset, $from_len) |
229
|
|
|
{ |
230
|
|
|
if ($opcode === 'c') { |
231
|
|
|
$edits[] = new FineDiffCopyOp($from_len); |
|
|
|
|
232
|
|
|
} else { |
233
|
|
|
if ($opcode === 'd') { |
234
|
|
|
$edits[] = new FineDiffDeleteOp($from_len); |
|
|
|
|
235
|
|
|
} else /* if ( $opcode === 'i' ) */ { |
236
|
|
|
$edits[] = new FineDiffInsertOp(substr($from, $from_offset, $from_len)); |
|
|
|
|
237
|
|
|
} |
238
|
|
|
} |
239
|
|
|
} |
240
|
|
|
|
241
|
|
|
public $edits = []; |
242
|
|
|
} |
243
|
|
|
|
244
|
|
|
/** |
245
|
|
|
* FineDiff class. |
246
|
|
|
* |
247
|
|
|
* TODO: Document |
248
|
|
|
*/ |
249
|
|
|
class FineDiff |
250
|
|
|
{ |
251
|
|
|
/**------------------------------------------------------------------------ |
252
|
|
|
* |
253
|
|
|
* Public section |
254
|
|
|
* |
255
|
|
|
*/ |
256
|
|
|
|
257
|
|
|
/** |
258
|
|
|
* Constructor |
259
|
|
|
* ... |
260
|
|
|
* The $granularityStack allows FineDiff to be configurable so that |
261
|
|
|
* a particular stack tailored to the specific content of a document can |
262
|
|
|
* be passed. |
263
|
|
|
*/ |
264
|
|
|
public function __construct($from_text = '', $to_text = '', $granularityStack = null) |
265
|
|
|
{ |
266
|
|
|
// setup stack for generic text documents by default |
267
|
|
|
$this->granularityStack = $granularityStack ? $granularityStack : self::$characterGranularity; |
|
|
|
|
268
|
|
|
$this->edits = []; |
|
|
|
|
269
|
|
|
$this->from_text = $from_text; |
|
|
|
|
270
|
|
|
$this->doDiff($from_text, $to_text); |
271
|
|
|
} |
272
|
|
|
|
273
|
|
|
public function getOps() |
274
|
|
|
{ |
275
|
|
|
return $this->edits; |
276
|
|
|
} |
277
|
|
|
|
278
|
|
|
public function getOpcodes() |
279
|
|
|
{ |
280
|
|
|
$opcodes = []; |
281
|
|
|
foreach ($this->edits as $edit) { |
282
|
|
|
$opcodes[] = $edit->getOpcode(); |
283
|
|
|
} |
284
|
|
|
|
285
|
|
|
return implode('', $opcodes); |
286
|
|
|
} |
287
|
|
|
|
288
|
|
|
public function renderDiffToHTML() |
289
|
|
|
{ |
290
|
|
|
$in_offset = 0; |
291
|
|
|
ob_start(); |
292
|
|
|
foreach ($this->edits as $edit) { |
293
|
|
|
$n = $edit->getFromLen(); |
294
|
|
|
if ($edit instanceof FineDiffCopyOp) { |
295
|
|
|
self::renderDiffToHTMLFromOpcode('c', $this->from_text, $in_offset, $n); |
296
|
|
|
} else { |
297
|
|
|
if ($edit instanceof FineDiffDeleteOp) { |
298
|
|
|
self::renderDiffToHTMLFromOpcode('d', $this->from_text, $in_offset, $n); |
299
|
|
|
} else { |
300
|
|
|
if ($edit instanceof FineDiffInsertOp) { |
301
|
|
|
self::renderDiffToHTMLFromOpcode('i', $edit->getText(), 0, $edit->getToLen()); |
302
|
|
|
} else /* if ( $edit instanceof FineDiffReplaceOp ) */ { |
303
|
|
|
self::renderDiffToHTMLFromOpcode('d', $this->from_text, $in_offset, $n); |
304
|
|
|
self::renderDiffToHTMLFromOpcode('i', $edit->getText(), 0, $edit->getToLen()); |
305
|
|
|
} |
306
|
|
|
} |
307
|
|
|
} |
308
|
|
|
$in_offset += $n; |
309
|
|
|
} |
310
|
|
|
|
311
|
|
|
return ob_get_clean(); |
312
|
|
|
} |
313
|
|
|
|
314
|
|
|
/**------------------------------------------------------------------------ |
315
|
|
|
* Return an opcodes string describing the diff between a "From" and a |
316
|
|
|
* "To" string |
317
|
|
|
*/ |
318
|
|
|
public static function getDiffOpcodes($from, $to, $granularities = null) |
319
|
|
|
{ |
320
|
|
|
$diff = new self($from, $to, $granularities); |
321
|
|
|
|
322
|
|
|
return $diff->getOpcodes(); |
323
|
|
|
} |
324
|
|
|
|
325
|
|
|
/**------------------------------------------------------------------------ |
326
|
|
|
* Return an iterable collection of diff ops from an opcodes string |
327
|
|
|
*/ |
328
|
|
|
public static function getDiffOpsFromOpcodes($opcodes) |
329
|
|
|
{ |
330
|
|
|
$diffops = new FineDiffOps(); |
331
|
|
|
self::renderFromOpcodes(null, $opcodes, [$diffops, 'appendOpcode']); |
332
|
|
|
|
333
|
|
|
return $diffops->edits; |
334
|
|
|
} |
335
|
|
|
|
336
|
|
|
/**------------------------------------------------------------------------ |
337
|
|
|
* Re-create the "To" string from the "From" string and an "Opcodes" string |
338
|
|
|
*/ |
339
|
|
|
public static function renderToTextFromOpcodes($from, $opcodes) |
340
|
|
|
{ |
341
|
|
|
ob_start(); |
342
|
|
|
self::renderFromOpcodes($from, $opcodes, ['FineDiff', 'renderToTextFromOpcode']); |
343
|
|
|
|
344
|
|
|
return ob_get_clean(); |
345
|
|
|
} |
346
|
|
|
|
347
|
|
|
/**------------------------------------------------------------------------ |
348
|
|
|
* Render the diff to an HTML string |
349
|
|
|
*/ |
350
|
|
|
public static function renderDiffToHTMLFromOpcodes($from, $opcodes) |
351
|
|
|
{ |
352
|
|
|
ob_start(); |
353
|
|
|
self::renderFromOpcodes($from, $opcodes, ['FineDiff', 'renderDiffToHTMLFromOpcode']); |
354
|
|
|
|
355
|
|
|
return ob_get_clean(); |
356
|
|
|
} |
357
|
|
|
|
358
|
|
|
/**------------------------------------------------------------------------ |
359
|
|
|
* Generic opcodes parser, user must supply callback for handling |
360
|
|
|
* single opcode |
361
|
|
|
*/ |
362
|
|
|
public static function renderFromOpcodes($from, $opcodes, $callback) |
363
|
|
|
{ |
364
|
|
|
if (!is_callable($callback)) { |
365
|
|
|
return; |
366
|
|
|
} |
367
|
|
|
$opcodes_len = strlen($opcodes); |
368
|
|
|
$from_offset = $opcodes_offset = 0; |
369
|
|
|
while ($opcodes_offset < $opcodes_len) { |
370
|
|
|
$opcode = substr($opcodes, $opcodes_offset, 1); |
371
|
|
|
$opcodes_offset++; |
372
|
|
|
$n = intval(substr($opcodes, $opcodes_offset)); |
373
|
|
|
if ($n) { |
374
|
|
|
$opcodes_offset += strlen(strval($n)); |
375
|
|
|
} else { |
376
|
|
|
$n = 1; |
377
|
|
|
} |
378
|
|
|
if ($opcode === 'c') { // copy n characters from source |
379
|
|
|
call_user_func($callback, 'c', $from, $from_offset, $n, ''); |
380
|
|
|
$from_offset += $n; |
381
|
|
|
} else { |
382
|
|
|
if ($opcode === 'd') { // delete n characters from source |
383
|
|
|
call_user_func($callback, 'd', $from, $from_offset, $n, ''); |
384
|
|
|
$from_offset += $n; |
385
|
|
|
} else /* if ( $opcode === 'i' ) */ { // insert n characters from opcodes |
386
|
|
|
call_user_func($callback, 'i', $opcodes, $opcodes_offset + 1, $n); |
387
|
|
|
$opcodes_offset += 1 + $n; |
388
|
|
|
} |
389
|
|
|
} |
390
|
|
|
} |
391
|
|
|
} |
392
|
|
|
|
393
|
|
|
/** |
394
|
|
|
* Stock granularity stacks and delimiters. |
395
|
|
|
*/ |
396
|
|
|
const paragraphDelimiters = "\n\r"; |
397
|
|
|
public static $paragraphGranularity = [ |
398
|
|
|
self::paragraphDelimiters, |
399
|
|
|
]; |
400
|
|
|
const sentenceDelimiters = ".\n\r"; |
401
|
|
|
public static $sentenceGranularity = [ |
402
|
|
|
self::paragraphDelimiters, |
403
|
|
|
self::sentenceDelimiters, |
404
|
|
|
]; |
405
|
|
|
const wordDelimiters = " \t.\n\r"; |
406
|
|
|
public static $wordGranularity = [ |
407
|
|
|
self::paragraphDelimiters, |
408
|
|
|
self::sentenceDelimiters, |
409
|
|
|
self::wordDelimiters, |
410
|
|
|
]; |
411
|
|
|
const characterDelimiters = ''; |
412
|
|
|
public static $characterGranularity = [ |
413
|
|
|
self::paragraphDelimiters, |
414
|
|
|
self::sentenceDelimiters, |
415
|
|
|
self::wordDelimiters, |
416
|
|
|
self::characterDelimiters, |
417
|
|
|
]; |
418
|
|
|
|
419
|
|
|
public static $textStack = [ |
420
|
|
|
'.', |
421
|
|
|
" \t.\n\r", |
422
|
|
|
'', |
423
|
|
|
]; |
424
|
|
|
|
425
|
|
|
/**------------------------------------------------------------------------ |
426
|
|
|
* |
427
|
|
|
* Private section |
428
|
|
|
* |
429
|
|
|
*/ |
430
|
|
|
|
431
|
|
|
/** |
432
|
|
|
* Entry point to compute the diff. |
433
|
|
|
*/ |
434
|
|
|
private function doDiff($from_text, $to_text) |
435
|
|
|
{ |
436
|
|
|
$this->last_edit = false; |
|
|
|
|
437
|
|
|
$this->stackpointer = 0; |
|
|
|
|
438
|
|
|
$this->from_text = $from_text; |
439
|
|
|
$this->from_offset = 0; |
|
|
|
|
440
|
|
|
// can't diff without at least one granularity specifier |
441
|
|
|
if (empty($this->granularityStack)) { |
442
|
|
|
return; |
443
|
|
|
} |
444
|
|
|
$this->_processGranularity($from_text, $to_text); |
445
|
|
|
} |
446
|
|
|
|
447
|
|
|
/** |
448
|
|
|
* This is the recursive function which is responsible for |
449
|
|
|
* handling/increasing granularity. |
450
|
|
|
* |
451
|
|
|
* Incrementally increasing the granularity is key to compute the |
452
|
|
|
* overall diff in a very efficient way. |
453
|
|
|
*/ |
454
|
|
|
private function _processGranularity($from_segment, $to_segment) |
455
|
|
|
{ |
456
|
|
|
$delimiters = $this->granularityStack[$this->stackpointer++]; |
457
|
|
|
$has_next_stage = $this->stackpointer < count($this->granularityStack); |
458
|
|
|
foreach (self::doFragmentDiff($from_segment, $to_segment, $delimiters) as $fragment_edit) { |
459
|
|
|
// increase granularity |
460
|
|
|
if ($fragment_edit instanceof FineDiffReplaceOp && $has_next_stage) { |
461
|
|
|
$this->_processGranularity( |
462
|
|
|
substr($this->from_text, $this->from_offset, $fragment_edit->getFromLen()), |
463
|
|
|
$fragment_edit->getText() |
464
|
|
|
); |
465
|
|
|
} // fuse copy ops whenever possible |
466
|
|
|
else { |
467
|
|
|
if ($fragment_edit instanceof FineDiffCopyOp && $this->last_edit instanceof FineDiffCopyOp) { |
468
|
|
|
$this->edits[count($this->edits) - 1]->increase($fragment_edit->getFromLen()); |
469
|
|
|
$this->from_offset += $fragment_edit->getFromLen(); |
470
|
|
|
} else { |
471
|
|
|
/* $fragment_edit instanceof FineDiffCopyOp */ |
472
|
|
|
/* $fragment_edit instanceof FineDiffDeleteOp */ |
473
|
|
|
/* $fragment_edit instanceof FineDiffInsertOp */ |
474
|
|
|
$this->edits[] = $this->last_edit = $fragment_edit; |
475
|
|
|
$this->from_offset += $fragment_edit->getFromLen(); |
476
|
|
|
} |
477
|
|
|
} |
478
|
|
|
} |
479
|
|
|
$this->stackpointer--; |
480
|
|
|
} |
481
|
|
|
|
482
|
|
|
/** |
483
|
|
|
* This is the core algorithm which actually perform the diff itself, |
484
|
|
|
* fragmenting the strings as per specified delimiters. |
485
|
|
|
* |
486
|
|
|
* This function is naturally recursive, however for performance purpose |
487
|
|
|
* a local job queue is used instead of outright recursivity. |
488
|
|
|
*/ |
489
|
|
|
private static function doFragmentDiff($from_text, $to_text, $delimiters) |
490
|
|
|
{ |
491
|
|
|
// Empty delimiter means character-level diffing. |
492
|
|
|
// In such case, use code path optimized for character-level |
493
|
|
|
// diffing. |
494
|
|
|
if (empty($delimiters)) { |
495
|
|
|
return self::doCharDiff($from_text, $to_text); |
496
|
|
|
} |
497
|
|
|
|
498
|
|
|
$result = []; |
499
|
|
|
|
500
|
|
|
// fragment-level diffing |
501
|
|
|
$from_text_len = strlen($from_text); |
502
|
|
|
$to_text_len = strlen($to_text); |
503
|
|
|
$from_fragments = self::extractFragments($from_text, $delimiters); |
504
|
|
|
$to_fragments = self::extractFragments($to_text, $delimiters); |
505
|
|
|
|
506
|
|
|
$jobs = [[0, $from_text_len, 0, $to_text_len]]; |
507
|
|
|
|
508
|
|
|
$cached_array_keys = []; |
509
|
|
|
|
510
|
|
|
while ($job = array_pop($jobs)) { |
511
|
|
|
|
512
|
|
|
// get the segments which must be diff'ed |
513
|
|
|
list($from_segment_start, $from_segment_end, $to_segment_start, $to_segment_end) = $job; |
514
|
|
|
|
515
|
|
|
// catch easy cases first |
516
|
|
|
$from_segment_length = $from_segment_end - $from_segment_start; |
517
|
|
|
$to_segment_length = $to_segment_end - $to_segment_start; |
518
|
|
View Code Duplication |
if (!$from_segment_length || !$to_segment_length) { |
|
|
|
|
519
|
|
|
if ($from_segment_length) { |
520
|
|
|
$result[$from_segment_start * 4] = new FineDiffDeleteOp($from_segment_length); |
521
|
|
|
} else { |
522
|
|
|
if ($to_segment_length) { |
523
|
|
|
$result[$from_segment_start * 4 + 1] = new FineDiffInsertOp(substr($to_text, $to_segment_start, |
524
|
|
|
$to_segment_length)); |
525
|
|
|
} |
526
|
|
|
} |
527
|
|
|
continue; |
528
|
|
|
} |
529
|
|
|
|
530
|
|
|
// find longest copy operation for the current segments |
531
|
|
|
$best_copy_length = 0; |
532
|
|
|
|
533
|
|
|
$from_base_fragment_index = $from_segment_start; |
534
|
|
|
|
535
|
|
|
$cached_array_keys_for_current_segment = []; |
536
|
|
|
|
537
|
|
|
while ($from_base_fragment_index < $from_segment_end) { |
538
|
|
|
$from_base_fragment = $from_fragments[$from_base_fragment_index]; |
539
|
|
|
$from_base_fragment_length = strlen($from_base_fragment); |
540
|
|
|
// performance boost: cache array keys |
541
|
|
|
if (!isset($cached_array_keys_for_current_segment[$from_base_fragment])) { |
542
|
|
|
if (!isset($cached_array_keys[$from_base_fragment])) { |
543
|
|
|
$to_all_fragment_indices = $cached_array_keys[$from_base_fragment] = array_keys($to_fragments, |
544
|
|
|
$from_base_fragment, true); |
545
|
|
|
} else { |
546
|
|
|
$to_all_fragment_indices = $cached_array_keys[$from_base_fragment]; |
547
|
|
|
} |
548
|
|
|
// get only indices which falls within current segment |
549
|
|
|
if ($to_segment_start > 0 || $to_segment_end < $to_text_len) { |
550
|
|
|
$to_fragment_indices = []; |
551
|
|
|
foreach ($to_all_fragment_indices as $to_fragment_index) { |
552
|
|
|
if ($to_fragment_index < $to_segment_start) { |
553
|
|
|
continue; |
554
|
|
|
} |
555
|
|
|
if ($to_fragment_index >= $to_segment_end) { |
556
|
|
|
break; |
557
|
|
|
} |
558
|
|
|
$to_fragment_indices[] = $to_fragment_index; |
559
|
|
|
} |
560
|
|
|
$cached_array_keys_for_current_segment[$from_base_fragment] = $to_fragment_indices; |
561
|
|
|
} else { |
562
|
|
|
$to_fragment_indices = $to_all_fragment_indices; |
563
|
|
|
} |
564
|
|
|
} else { |
565
|
|
|
$to_fragment_indices = $cached_array_keys_for_current_segment[$from_base_fragment]; |
566
|
|
|
} |
567
|
|
|
// iterate through collected indices |
568
|
|
|
foreach ($to_fragment_indices as $to_base_fragment_index) { |
569
|
|
|
$fragment_index_offset = $from_base_fragment_length; |
570
|
|
|
// iterate until no more match |
571
|
|
|
for (; ;) { |
572
|
|
|
$fragment_from_index = $from_base_fragment_index + $fragment_index_offset; |
573
|
|
|
if ($fragment_from_index >= $from_segment_end) { |
574
|
|
|
break; |
575
|
|
|
} |
576
|
|
|
$fragment_to_index = $to_base_fragment_index + $fragment_index_offset; |
577
|
|
|
if ($fragment_to_index >= $to_segment_end) { |
578
|
|
|
break; |
579
|
|
|
} |
580
|
|
|
if ($from_fragments[$fragment_from_index] !== $to_fragments[$fragment_to_index]) { |
581
|
|
|
break; |
582
|
|
|
} |
583
|
|
|
$fragment_length = strlen($from_fragments[$fragment_from_index]); |
584
|
|
|
$fragment_index_offset += $fragment_length; |
585
|
|
|
} |
586
|
|
|
if ($fragment_index_offset > $best_copy_length) { |
587
|
|
|
$best_copy_length = $fragment_index_offset; |
588
|
|
|
$best_from_start = $from_base_fragment_index; |
589
|
|
|
$best_to_start = $to_base_fragment_index; |
590
|
|
|
} |
591
|
|
|
} |
592
|
|
|
$from_base_fragment_index += strlen($from_base_fragment); |
593
|
|
|
// If match is larger than half segment size, no point trying to find better |
594
|
|
|
// TODO: Really? |
595
|
|
|
if ($best_copy_length >= $from_segment_length / 2) { |
596
|
|
|
break; |
597
|
|
|
} |
598
|
|
|
// no point to keep looking if what is left is less than |
599
|
|
|
// current best match |
600
|
|
|
if ($from_base_fragment_index + $best_copy_length >= $from_segment_end) { |
601
|
|
|
break; |
602
|
|
|
} |
603
|
|
|
} |
604
|
|
|
|
605
|
|
View Code Duplication |
if ($best_copy_length) { |
|
|
|
|
606
|
|
|
$jobs[] = [ |
607
|
|
|
$from_segment_start, |
608
|
|
|
$best_from_start, |
|
|
|
|
609
|
|
|
$to_segment_start, |
610
|
|
|
$best_to_start, |
|
|
|
|
611
|
|
|
]; |
612
|
|
|
$result[$best_from_start * 4 + 2] = new FineDiffCopyOp($best_copy_length); |
613
|
|
|
$jobs[] = [ |
614
|
|
|
$best_from_start + $best_copy_length, |
615
|
|
|
$from_segment_end, |
616
|
|
|
$best_to_start + $best_copy_length, |
617
|
|
|
$to_segment_end, |
618
|
|
|
]; |
619
|
|
|
} else { |
620
|
|
|
$result[$from_segment_start * 4] = new FineDiffReplaceOp($from_segment_length, |
621
|
|
|
substr($to_text, $to_segment_start, $to_segment_length)); |
622
|
|
|
} |
623
|
|
|
} |
624
|
|
|
|
625
|
|
|
ksort($result, SORT_NUMERIC); |
626
|
|
|
|
627
|
|
|
return array_values($result); |
628
|
|
|
} |
629
|
|
|
|
630
|
|
|
/** |
631
|
|
|
* Perform a character-level diff. |
632
|
|
|
* |
633
|
|
|
* The algorithm is quite similar to doFragmentDiff(), except that |
634
|
|
|
* the code path is optimized for character-level diff -- strpos() is |
635
|
|
|
* used to find out the longest common subequence of characters. |
636
|
|
|
* |
637
|
|
|
* We try to find a match using the longest possible subsequence, which |
638
|
|
|
* is at most the length of the shortest of the two strings, then incrementally |
639
|
|
|
* reduce the size until a match is found. |
640
|
|
|
* |
641
|
|
|
* I still need to study more the performance of this function. It |
642
|
|
|
* appears that for long strings, the generic doFragmentDiff() is more |
643
|
|
|
* performant. For word-sized strings, doCharDiff() is somewhat more |
644
|
|
|
* performant. |
645
|
|
|
*/ |
646
|
|
|
private static function doCharDiff($from_text, $to_text) |
647
|
|
|
{ |
648
|
|
|
$result = []; |
649
|
|
|
$jobs = [[0, strlen($from_text), 0, strlen($to_text)]]; |
650
|
|
|
while ($job = array_pop($jobs)) { |
651
|
|
|
// get the segments which must be diff'ed |
652
|
|
|
list($from_segment_start, $from_segment_end, $to_segment_start, $to_segment_end) = $job; |
653
|
|
|
$from_segment_len = $from_segment_end - $from_segment_start; |
654
|
|
|
$to_segment_len = $to_segment_end - $to_segment_start; |
655
|
|
|
|
656
|
|
|
// catch easy cases first |
657
|
|
View Code Duplication |
if (!$from_segment_len || !$to_segment_len) { |
|
|
|
|
658
|
|
|
if ($from_segment_len) { |
659
|
|
|
$result[$from_segment_start * 4 + 0] = new FineDiffDeleteOp($from_segment_len); |
660
|
|
|
} else { |
661
|
|
|
if ($to_segment_len) { |
662
|
|
|
$result[$from_segment_start * 4 + 1] = new FineDiffInsertOp(substr($to_text, $to_segment_start, |
663
|
|
|
$to_segment_len)); |
664
|
|
|
} |
665
|
|
|
} |
666
|
|
|
continue; |
667
|
|
|
} |
668
|
|
|
if ($from_segment_len >= $to_segment_len) { |
669
|
|
|
$copy_len = $to_segment_len; |
670
|
|
View Code Duplication |
while ($copy_len) { |
|
|
|
|
671
|
|
|
$to_copy_start = $to_segment_start; |
672
|
|
|
$to_copy_start_max = $to_segment_end - $copy_len; |
673
|
|
|
while ($to_copy_start <= $to_copy_start_max) { |
674
|
|
|
$from_copy_start = strpos(substr($from_text, $from_segment_start, $from_segment_len), |
675
|
|
|
substr($to_text, $to_copy_start, $copy_len)); |
676
|
|
|
if ($from_copy_start !== false) { |
677
|
|
|
$from_copy_start += $from_segment_start; |
678
|
|
|
break 2; |
679
|
|
|
} |
680
|
|
|
$to_copy_start++; |
681
|
|
|
} |
682
|
|
|
$copy_len--; |
683
|
|
|
} |
684
|
|
|
} else { |
685
|
|
|
$copy_len = $from_segment_len; |
686
|
|
View Code Duplication |
while ($copy_len) { |
|
|
|
|
687
|
|
|
$from_copy_start = $from_segment_start; |
688
|
|
|
$from_copy_start_max = $from_segment_end - $copy_len; |
689
|
|
|
while ($from_copy_start <= $from_copy_start_max) { |
690
|
|
|
$to_copy_start = strpos(substr($to_text, $to_segment_start, $to_segment_len), |
691
|
|
|
substr($from_text, $from_copy_start, $copy_len)); |
692
|
|
|
if ($to_copy_start !== false) { |
693
|
|
|
$to_copy_start += $to_segment_start; |
694
|
|
|
break 2; |
695
|
|
|
} |
696
|
|
|
$from_copy_start++; |
697
|
|
|
} |
698
|
|
|
$copy_len--; |
699
|
|
|
} |
700
|
|
|
} |
701
|
|
|
// match found |
702
|
|
View Code Duplication |
if ($copy_len) { |
|
|
|
|
703
|
|
|
$jobs[] = [ |
704
|
|
|
$from_segment_start, |
705
|
|
|
$from_copy_start, |
|
|
|
|
706
|
|
|
$to_segment_start, |
707
|
|
|
$to_copy_start, |
|
|
|
|
708
|
|
|
]; |
709
|
|
|
$result[$from_copy_start * 4 + 2] = new FineDiffCopyOp($copy_len); |
710
|
|
|
$jobs[] = [ |
711
|
|
|
$from_copy_start + $copy_len, |
712
|
|
|
$from_segment_end, |
713
|
|
|
$to_copy_start + $copy_len, |
714
|
|
|
$to_segment_end, |
715
|
|
|
]; |
716
|
|
|
} // no match, so delete all, insert all |
717
|
|
|
else { |
718
|
|
|
$result[$from_segment_start * 4] = new FineDiffReplaceOp($from_segment_len, |
719
|
|
|
substr($to_text, $to_segment_start, $to_segment_len)); |
720
|
|
|
} |
721
|
|
|
} |
722
|
|
|
ksort($result, SORT_NUMERIC); |
723
|
|
|
|
724
|
|
|
return array_values($result); |
725
|
|
|
} |
726
|
|
|
|
727
|
|
|
/** |
728
|
|
|
* Efficiently fragment the text into an array according to |
729
|
|
|
* specified delimiters. |
730
|
|
|
* No delimiters means fragment into single character. |
731
|
|
|
* The array indices are the offset of the fragments into |
732
|
|
|
* the input string. |
733
|
|
|
* A sentinel empty fragment is always added at the end. |
734
|
|
|
* Careful: No check is performed as to the validity of the |
735
|
|
|
* delimiters. |
736
|
|
|
*/ |
737
|
|
|
private static function extractFragments($text, $delimiters) |
738
|
|
|
{ |
739
|
|
|
// special case: split into characters |
740
|
|
|
if (empty($delimiters)) { |
741
|
|
|
$chars = str_split($text, 1); |
742
|
|
|
$chars[strlen($text)] = ''; |
743
|
|
|
|
744
|
|
|
return $chars; |
745
|
|
|
} |
746
|
|
|
$fragments = []; |
747
|
|
|
$start = $end = 0; |
748
|
|
|
for (; ;) { |
749
|
|
|
$end += strcspn($text, $delimiters, $end); |
750
|
|
|
$end += strspn($text, $delimiters, $end); |
751
|
|
|
if ($end === $start) { |
752
|
|
|
break; |
753
|
|
|
} |
754
|
|
|
$fragments[$start] = substr($text, $start, $end - $start); |
755
|
|
|
$start = $end; |
756
|
|
|
} |
757
|
|
|
$fragments[$start] = ''; |
758
|
|
|
|
759
|
|
|
return $fragments; |
760
|
|
|
} |
761
|
|
|
|
762
|
|
|
/** |
763
|
|
|
* Stock opcode renderers. |
764
|
|
|
*/ |
765
|
|
|
private static function renderToTextFromOpcode($opcode, $from, $from_offset, $from_len) |
766
|
|
|
{ |
767
|
|
|
if ($opcode === 'c' || $opcode === 'i') { |
768
|
|
|
echo substr($from, $from_offset, $from_len); |
769
|
|
|
} |
770
|
|
|
} |
771
|
|
|
|
772
|
|
|
private static function renderDiffToHTMLFromOpcode($opcode, $from, $from_offset, $from_len) |
773
|
|
|
{ |
774
|
|
|
if ($opcode === 'c') { |
775
|
|
|
echo htmlentities(substr($from, $from_offset, $from_len)); |
776
|
|
|
} else { |
777
|
|
|
if ($opcode === 'd') { |
778
|
|
|
$deletion = substr($from, $from_offset, $from_len); |
779
|
|
|
if (strcspn($deletion, " \n\r") === 0) { |
780
|
|
|
$deletion = str_replace(["\n", "\r"], ['\n', '\r'], $deletion); |
781
|
|
|
} |
782
|
|
|
echo '<del>', htmlentities($deletion), '</del>'; |
783
|
|
|
} else /* if ( $opcode === 'i' ) */ { |
784
|
|
|
echo '<ins>', htmlentities(substr($from, $from_offset, $from_len)), '</ins>'; |
785
|
|
|
} |
786
|
|
|
} |
787
|
|
|
} |
788
|
|
|
} |
789
|
|
|
|
In PHP it is possible to write to properties without declaring them. For example, the following is perfectly valid PHP code:
Generally, it is a good practice to explictly declare properties to avoid accidental typos and provide IDE auto-completion: