1
|
|
|
<?php |
2
|
|
|
|
3
|
|
|
namespace Caxy\HtmlDiff\Table; |
4
|
|
|
|
5
|
|
|
use Caxy\HtmlDiff\AbstractDiff; |
6
|
|
|
use Caxy\HtmlDiff\HtmlDiff; |
7
|
|
|
use Caxy\HtmlDiff\HtmlDiffConfig; |
8
|
|
|
use Caxy\HtmlDiff\Operation; |
9
|
|
|
|
10
|
|
|
/** |
11
|
|
|
* Class TableDiff |
12
|
|
|
* @package Caxy\HtmlDiff\Table |
13
|
|
|
*/ |
14
|
|
|
class TableDiff extends AbstractDiff |
15
|
|
|
{ |
16
|
|
|
/** |
17
|
|
|
* @var null|Table |
18
|
|
|
*/ |
19
|
|
|
protected $oldTable = null; |
20
|
|
|
|
21
|
|
|
/** |
22
|
|
|
* @var null|Table |
23
|
|
|
*/ |
24
|
|
|
protected $newTable = null; |
25
|
|
|
|
26
|
|
|
/** |
27
|
|
|
* @var null|\DOMElement |
28
|
|
|
*/ |
29
|
|
|
protected $diffTable = null; |
30
|
|
|
|
31
|
|
|
/** |
32
|
|
|
* @var null|\DOMDocument |
33
|
|
|
*/ |
34
|
|
|
protected $diffDom = null; |
35
|
|
|
|
36
|
|
|
/** |
37
|
|
|
* @var int |
38
|
|
|
*/ |
39
|
|
|
protected $newRowOffsets = 0; |
40
|
|
|
|
41
|
|
|
/** |
42
|
|
|
* @var int |
43
|
|
|
*/ |
44
|
|
|
protected $oldRowOffsets = 0; |
45
|
|
|
|
46
|
|
|
/** |
47
|
|
|
* @var array |
48
|
|
|
*/ |
49
|
|
|
protected $cellValues = array(); |
50
|
|
|
|
51
|
|
|
/** |
52
|
|
|
* @var \HTMLPurifier |
53
|
|
|
*/ |
54
|
|
|
protected $purifier; |
55
|
|
|
|
56
|
|
|
/** |
57
|
|
|
* @param string $oldText |
58
|
|
|
* @param string $newText |
59
|
|
|
* @param HtmlDiffConfig|null $config |
60
|
|
|
* |
61
|
|
|
* @return self |
62
|
|
|
*/ |
63
|
|
View Code Duplication |
public static function create($oldText, $newText, HtmlDiffConfig $config = null) |
|
|
|
|
64
|
|
|
{ |
65
|
|
|
$diff = new self($oldText, $newText); |
66
|
|
|
|
67
|
|
|
if (null !== $config) { |
68
|
|
|
$diff->setConfig($config); |
69
|
|
|
} |
70
|
|
|
|
71
|
|
|
return $diff; |
72
|
|
|
} |
73
|
|
|
|
74
|
|
|
/** |
75
|
|
|
* TableDiff constructor. |
76
|
|
|
* |
77
|
|
|
* @param string $oldText |
78
|
|
|
* @param string $newText |
79
|
|
|
* @param string $encoding |
80
|
|
|
* @param array|null $specialCaseTags |
81
|
|
|
* @param bool|null $groupDiffs |
82
|
|
|
*/ |
83
|
|
|
public function __construct($oldText, $newText, $encoding = 'UTF-8', $specialCaseTags = null, $groupDiffs = null) |
84
|
|
|
{ |
85
|
|
|
parent::__construct($oldText, $newText, $encoding, $specialCaseTags, $groupDiffs); |
86
|
|
|
|
87
|
|
|
$this->purifier = new \HTMLPurifier(\HTMLPurifier_Config::createDefault()); |
88
|
|
|
} |
89
|
|
|
|
90
|
|
|
/** |
91
|
|
|
* @return string |
92
|
|
|
*/ |
93
|
|
View Code Duplication |
public function build() |
|
|
|
|
94
|
|
|
{ |
95
|
|
|
if ($this->hasDiffCache() && $this->getDiffCache()->contains($this->oldText, $this->newText)) { |
96
|
|
|
$this->content = $this->getDiffCache()->fetch($this->oldText, $this->newText); |
97
|
|
|
|
98
|
|
|
return $this->content; |
99
|
|
|
} |
100
|
|
|
|
101
|
|
|
$this->buildTableDoms(); |
102
|
|
|
|
103
|
|
|
$this->diffDom = new \DOMDocument(); |
104
|
|
|
|
105
|
|
|
$this->indexCellValues($this->newTable); |
|
|
|
|
106
|
|
|
|
107
|
|
|
$this->diffTableContent(); |
108
|
|
|
|
109
|
|
|
if ($this->hasDiffCache()) { |
110
|
|
|
$this->getDiffCache()->save($this->oldText, $this->newText, $this->content); |
111
|
|
|
} |
112
|
|
|
|
113
|
|
|
return $this->content; |
114
|
|
|
} |
115
|
|
|
|
116
|
|
|
protected function diffTableContent() |
117
|
|
|
{ |
118
|
|
|
$this->diffDom = new \DOMDocument(); |
119
|
|
|
$this->diffTable = $this->newTable->cloneNode($this->diffDom); |
120
|
|
|
$this->diffDom->appendChild($this->diffTable); |
121
|
|
|
|
122
|
|
|
$oldRows = $this->oldTable->getRows(); |
123
|
|
|
$newRows = $this->newTable->getRows(); |
124
|
|
|
|
125
|
|
|
$oldMatchData = array(); |
126
|
|
|
$newMatchData = array(); |
127
|
|
|
|
128
|
|
|
/* @var $oldRow TableRow */ |
129
|
|
|
foreach ($oldRows as $oldIndex => $oldRow) { |
130
|
|
|
$oldMatchData[$oldIndex] = array(); |
131
|
|
|
|
132
|
|
|
// Get match percentages |
133
|
|
|
/* @var $newRow TableRow */ |
134
|
|
|
foreach ($newRows as $newIndex => $newRow) { |
135
|
|
|
if (!array_key_exists($newIndex, $newMatchData)) { |
136
|
|
|
$newMatchData[$newIndex] = array(); |
137
|
|
|
} |
138
|
|
|
|
139
|
|
|
// similar_text |
140
|
|
|
$percentage = $this->getMatchPercentage($oldRow, $newRow, $oldIndex, $newIndex); |
141
|
|
|
|
142
|
|
|
$oldMatchData[$oldIndex][$newIndex] = $percentage; |
143
|
|
|
$newMatchData[$newIndex][$oldIndex] = $percentage; |
144
|
|
|
} |
145
|
|
|
} |
146
|
|
|
|
147
|
|
|
$matches = $this->getRowMatches($oldMatchData, $newMatchData); |
148
|
|
|
$this->diffTableRowsWithMatches($oldRows, $newRows, $matches); |
149
|
|
|
|
150
|
|
|
$this->content = $this->htmlFromNode($this->diffTable); |
151
|
|
|
} |
152
|
|
|
|
153
|
|
|
/** |
154
|
|
|
* @param TableRow[] $oldRows |
155
|
|
|
* @param TableRow[] $newRows |
156
|
|
|
* @param RowMatch[] $matches |
157
|
|
|
*/ |
158
|
|
|
protected function diffTableRowsWithMatches($oldRows, $newRows, $matches) |
159
|
|
|
{ |
160
|
|
|
$operations = array(); |
161
|
|
|
|
162
|
|
|
$indexInOld = 0; |
163
|
|
|
$indexInNew = 0; |
164
|
|
|
|
165
|
|
|
$oldRowCount = count($oldRows); |
166
|
|
|
$newRowCount = count($newRows); |
167
|
|
|
|
168
|
|
|
$matches[] = new RowMatch($newRowCount, $oldRowCount, $newRowCount, $oldRowCount); |
169
|
|
|
|
170
|
|
|
// build operations |
171
|
|
|
foreach ($matches as $match) { |
172
|
|
|
$matchAtIndexInOld = $indexInOld === $match->getStartInOld(); |
173
|
|
|
$matchAtIndexInNew = $indexInNew === $match->getStartInNew(); |
174
|
|
|
|
175
|
|
|
$action = 'equal'; |
176
|
|
|
|
177
|
|
|
if (!$matchAtIndexInOld && !$matchAtIndexInNew) { |
178
|
|
|
$action = 'replace'; |
179
|
|
|
} elseif ($matchAtIndexInOld && !$matchAtIndexInNew) { |
180
|
|
|
$action = 'insert'; |
181
|
|
|
} elseif (!$matchAtIndexInOld && $matchAtIndexInNew) { |
182
|
|
|
$action = 'delete'; |
183
|
|
|
} |
184
|
|
|
|
185
|
|
|
if ($action !== 'equal') { |
186
|
|
|
$operations[] = new Operation( |
187
|
|
|
$action, |
188
|
|
|
$indexInOld, |
189
|
|
|
$match->getStartInOld(), |
190
|
|
|
$indexInNew, |
191
|
|
|
$match->getStartInNew() |
192
|
|
|
); |
193
|
|
|
} |
194
|
|
|
|
195
|
|
|
$operations[] = new Operation( |
196
|
|
|
'equal', |
197
|
|
|
$match->getStartInOld(), |
198
|
|
|
$match->getEndInOld(), |
199
|
|
|
$match->getStartInNew(), |
200
|
|
|
$match->getEndInNew() |
201
|
|
|
); |
202
|
|
|
|
203
|
|
|
$indexInOld = $match->getEndInOld(); |
204
|
|
|
$indexInNew = $match->getEndInNew(); |
205
|
|
|
} |
206
|
|
|
|
207
|
|
|
$appliedRowSpans = array(); |
208
|
|
|
|
209
|
|
|
// process operations |
210
|
|
|
foreach ($operations as $operation) { |
211
|
|
|
switch ($operation->action) { |
212
|
|
|
case 'equal': |
213
|
|
|
$this->processEqualOperation($operation, $oldRows, $newRows, $appliedRowSpans); |
214
|
|
|
break; |
215
|
|
|
|
216
|
|
|
case 'delete': |
217
|
|
|
$this->processDeleteOperation($operation, $oldRows, $appliedRowSpans); |
218
|
|
|
break; |
219
|
|
|
|
220
|
|
|
case 'insert': |
221
|
|
|
$this->processInsertOperation($operation, $newRows, $appliedRowSpans); |
222
|
|
|
break; |
223
|
|
|
|
224
|
|
|
case 'replace': |
225
|
|
|
$this->processReplaceOperation($operation, $oldRows, $newRows, $appliedRowSpans); |
226
|
|
|
break; |
227
|
|
|
} |
228
|
|
|
} |
229
|
|
|
} |
230
|
|
|
|
231
|
|
|
/** |
232
|
|
|
* @param Operation $operation |
233
|
|
|
* @param array $newRows |
234
|
|
|
* @param array $appliedRowSpans |
235
|
|
|
* @param bool $forceExpansion |
236
|
|
|
*/ |
237
|
|
View Code Duplication |
protected function processInsertOperation( |
|
|
|
|
238
|
|
|
Operation $operation, |
239
|
|
|
$newRows, |
240
|
|
|
&$appliedRowSpans, |
241
|
|
|
$forceExpansion = false |
242
|
|
|
) { |
243
|
|
|
$targetRows = array_slice($newRows, $operation->startInNew, $operation->endInNew - $operation->startInNew); |
244
|
|
|
foreach ($targetRows as $row) { |
245
|
|
|
$this->diffAndAppendRows(null, $row, $appliedRowSpans, $forceExpansion); |
246
|
|
|
} |
247
|
|
|
} |
248
|
|
|
|
249
|
|
|
/** |
250
|
|
|
* @param Operation $operation |
251
|
|
|
* @param array $oldRows |
252
|
|
|
* @param array $appliedRowSpans |
253
|
|
|
* @param bool $forceExpansion |
254
|
|
|
*/ |
255
|
|
View Code Duplication |
protected function processDeleteOperation( |
|
|
|
|
256
|
|
|
Operation $operation, |
257
|
|
|
$oldRows, |
258
|
|
|
&$appliedRowSpans, |
259
|
|
|
$forceExpansion = false |
260
|
|
|
) { |
261
|
|
|
$targetRows = array_slice($oldRows, $operation->startInOld, $operation->endInOld - $operation->startInOld); |
262
|
|
|
foreach ($targetRows as $row) { |
263
|
|
|
$this->diffAndAppendRows($row, null, $appliedRowSpans, $forceExpansion); |
264
|
|
|
} |
265
|
|
|
} |
266
|
|
|
|
267
|
|
|
/** |
268
|
|
|
* @param Operation $operation |
269
|
|
|
* @param array $oldRows |
270
|
|
|
* @param array $newRows |
271
|
|
|
* @param array $appliedRowSpans |
272
|
|
|
*/ |
273
|
|
|
protected function processEqualOperation(Operation $operation, $oldRows, $newRows, &$appliedRowSpans) |
274
|
|
|
{ |
275
|
|
|
$targetOldRows = array_values( |
276
|
|
|
array_slice($oldRows, $operation->startInOld, $operation->endInOld - $operation->startInOld) |
277
|
|
|
); |
278
|
|
|
$targetNewRows = array_values( |
279
|
|
|
array_slice($newRows, $operation->startInNew, $operation->endInNew - $operation->startInNew) |
280
|
|
|
); |
281
|
|
|
|
282
|
|
|
foreach ($targetNewRows as $index => $newRow) { |
283
|
|
|
if (!isset($targetOldRows[$index])) { |
284
|
|
|
continue; |
285
|
|
|
} |
286
|
|
|
|
287
|
|
|
$this->diffAndAppendRows($targetOldRows[$index], $newRow, $appliedRowSpans); |
288
|
|
|
} |
289
|
|
|
} |
290
|
|
|
|
291
|
|
|
/** |
292
|
|
|
* @param Operation $operation |
293
|
|
|
* @param array $oldRows |
294
|
|
|
* @param array $newRows |
295
|
|
|
* @param array $appliedRowSpans |
296
|
|
|
*/ |
297
|
|
|
protected function processReplaceOperation(Operation $operation, $oldRows, $newRows, &$appliedRowSpans) |
298
|
|
|
{ |
299
|
|
|
$this->processDeleteOperation($operation, $oldRows, $appliedRowSpans, true); |
300
|
|
|
$this->processInsertOperation($operation, $newRows, $appliedRowSpans, true); |
301
|
|
|
} |
302
|
|
|
|
303
|
|
|
/** |
304
|
|
|
* @param array $oldMatchData |
305
|
|
|
* @param array $newMatchData |
306
|
|
|
* |
307
|
|
|
* @return array |
308
|
|
|
*/ |
309
|
|
|
protected function getRowMatches($oldMatchData, $newMatchData) |
310
|
|
|
{ |
311
|
|
|
$matches = array(); |
312
|
|
|
|
313
|
|
|
$startInOld = 0; |
314
|
|
|
$startInNew = 0; |
315
|
|
|
$endInOld = count($oldMatchData); |
316
|
|
|
$endInNew = count($newMatchData); |
317
|
|
|
|
318
|
|
|
$this->findRowMatches($newMatchData, $startInOld, $endInOld, $startInNew, $endInNew, $matches); |
319
|
|
|
|
320
|
|
|
return $matches; |
321
|
|
|
} |
322
|
|
|
|
323
|
|
|
/** |
324
|
|
|
* @param array $newMatchData |
325
|
|
|
* @param int $startInOld |
326
|
|
|
* @param int $endInOld |
327
|
|
|
* @param int $startInNew |
328
|
|
|
* @param int $endInNew |
329
|
|
|
* @param array $matches |
330
|
|
|
*/ |
331
|
|
|
protected function findRowMatches($newMatchData, $startInOld, $endInOld, $startInNew, $endInNew, &$matches) |
332
|
|
|
{ |
333
|
|
|
$match = $this->findRowMatch($newMatchData, $startInOld, $endInOld, $startInNew, $endInNew); |
334
|
|
|
if ($match !== null) { |
335
|
|
|
if ($startInOld < $match->getStartInOld() && |
336
|
|
|
$startInNew < $match->getStartInNew() |
337
|
|
|
) { |
338
|
|
|
$this->findRowMatches( |
339
|
|
|
$newMatchData, |
340
|
|
|
$startInOld, |
341
|
|
|
$match->getStartInOld(), |
342
|
|
|
$startInNew, |
343
|
|
|
$match->getStartInNew(), |
344
|
|
|
$matches |
345
|
|
|
); |
346
|
|
|
} |
347
|
|
|
|
348
|
|
|
$matches[] = $match; |
349
|
|
|
|
350
|
|
|
if ($match->getEndInOld() < $endInOld && |
351
|
|
|
$match->getEndInNew() < $endInNew |
352
|
|
|
) { |
353
|
|
|
$this->findRowMatches( |
354
|
|
|
$newMatchData, |
355
|
|
|
$match->getEndInOld(), |
356
|
|
|
$endInOld, |
357
|
|
|
$match->getEndInNew(), |
358
|
|
|
$endInNew, |
359
|
|
|
$matches |
360
|
|
|
); |
361
|
|
|
} |
362
|
|
|
} |
363
|
|
|
} |
364
|
|
|
|
365
|
|
|
/** |
366
|
|
|
* @param array $newMatchData |
367
|
|
|
* @param int $startInOld |
368
|
|
|
* @param int $endInOld |
369
|
|
|
* @param int $startInNew |
370
|
|
|
* @param int $endInNew |
371
|
|
|
* |
372
|
|
|
* @return RowMatch|null |
373
|
|
|
*/ |
374
|
|
|
protected function findRowMatch($newMatchData, $startInOld, $endInOld, $startInNew, $endInNew) |
375
|
|
|
{ |
376
|
|
|
$bestMatch = null; |
377
|
|
|
$bestPercentage = 0; |
378
|
|
|
|
379
|
|
|
foreach ($newMatchData as $newIndex => $oldMatches) { |
380
|
|
|
if ($newIndex < $startInNew) { |
381
|
|
|
continue; |
382
|
|
|
} |
383
|
|
|
|
384
|
|
|
if ($newIndex >= $endInNew) { |
385
|
|
|
break; |
386
|
|
|
} |
387
|
|
|
foreach ($oldMatches as $oldIndex => $percentage) { |
388
|
|
|
if ($oldIndex < $startInOld) { |
389
|
|
|
continue; |
390
|
|
|
} |
391
|
|
|
|
392
|
|
|
if ($oldIndex >= $endInOld) { |
393
|
|
|
break; |
394
|
|
|
} |
395
|
|
|
|
396
|
|
|
if ($percentage > $bestPercentage) { |
397
|
|
|
$bestPercentage = $percentage; |
398
|
|
|
$bestMatch = array( |
399
|
|
|
'oldIndex' => $oldIndex, |
400
|
|
|
'newIndex' => $newIndex, |
401
|
|
|
'percentage' => $percentage, |
402
|
|
|
); |
403
|
|
|
} |
404
|
|
|
} |
405
|
|
|
} |
406
|
|
|
|
407
|
|
|
if ($bestMatch !== null) { |
408
|
|
|
return new RowMatch( |
409
|
|
|
$bestMatch['newIndex'], |
410
|
|
|
$bestMatch['oldIndex'], |
411
|
|
|
$bestMatch['newIndex'] + 1, |
412
|
|
|
$bestMatch['oldIndex'] + 1, |
413
|
|
|
$bestMatch['percentage'] |
414
|
|
|
); |
415
|
|
|
} |
416
|
|
|
|
417
|
|
|
return null; |
418
|
|
|
} |
419
|
|
|
|
420
|
|
|
/** |
421
|
|
|
* @param TableRow|null $oldRow |
422
|
|
|
* @param TableRow|null $newRow |
423
|
|
|
* @param array $appliedRowSpans |
424
|
|
|
* @param bool $forceExpansion |
425
|
|
|
* |
426
|
|
|
* @return array |
427
|
|
|
*/ |
428
|
|
|
protected function diffRows($oldRow, $newRow, array &$appliedRowSpans, $forceExpansion = false) |
429
|
|
|
{ |
430
|
|
|
// create tr dom element |
431
|
|
|
$rowToClone = $newRow ?: $oldRow; |
432
|
|
|
/* @var $diffRow \DOMElement */ |
433
|
|
|
$diffRow = $this->diffDom->importNode($rowToClone->getDomNode()->cloneNode(false), false); |
434
|
|
|
|
435
|
|
|
$oldCells = $oldRow ? $oldRow->getCells() : array(); |
436
|
|
|
$newCells = $newRow ? $newRow->getCells() : array(); |
437
|
|
|
|
438
|
|
|
$position = new DiffRowPosition(); |
439
|
|
|
|
440
|
|
|
$extraRow = null; |
441
|
|
|
|
442
|
|
|
/* @var $expandCells \DOMElement[] */ |
443
|
|
|
$expandCells = array(); |
444
|
|
|
/* @var $cellsWithMultipleRows \DOMElement[] */ |
445
|
|
|
$cellsWithMultipleRows = array(); |
446
|
|
|
|
447
|
|
|
$newCellCount = count($newCells); |
448
|
|
|
while ($position->getIndexInNew() < $newCellCount) { |
449
|
|
|
if (!$position->areColumnsEqual()) { |
450
|
|
|
$type = $position->getLesserColumnType(); |
451
|
|
|
if ($type === 'new') { |
452
|
|
|
$row = $newRow; |
453
|
|
|
$targetRow = $extraRow; |
454
|
|
|
} else { |
455
|
|
|
$row = $oldRow; |
456
|
|
|
$targetRow = $diffRow; |
457
|
|
|
} |
458
|
|
|
if ($row && $targetRow && (!$type === 'old' || isset($oldCells[$position->getIndexInOld()]))) { |
459
|
|
|
$this->syncVirtualColumns($row, $position, $cellsWithMultipleRows, $targetRow, $type, true); |
460
|
|
|
|
461
|
|
|
continue; |
462
|
|
|
} |
463
|
|
|
} |
464
|
|
|
|
465
|
|
|
/* @var $newCell TableCell */ |
466
|
|
|
$newCell = $newCells[$position->getIndexInNew()]; |
467
|
|
|
/* @var $oldCell TableCell */ |
468
|
|
|
$oldCell = isset($oldCells[$position->getIndexInOld()]) ? $oldCells[$position->getIndexInOld()] : null; |
469
|
|
|
|
470
|
|
|
if ($oldCell && $newCell->getColspan() != $oldCell->getColspan()) { |
471
|
|
|
if (null === $extraRow) { |
472
|
|
|
/* @var $extraRow \DOMElement */ |
473
|
|
|
$extraRow = $this->diffDom->importNode($rowToClone->getDomNode()->cloneNode(false), false); |
474
|
|
|
} |
475
|
|
|
|
476
|
|
|
if ($oldCell->getColspan() > $newCell->getColspan()) { |
477
|
|
|
$this->diffCellsAndIncrementCounters( |
478
|
|
|
$oldCell, |
479
|
|
|
null, |
480
|
|
|
$cellsWithMultipleRows, |
481
|
|
|
$diffRow, |
482
|
|
|
$position, |
483
|
|
|
true |
484
|
|
|
); |
485
|
|
|
$this->syncVirtualColumns($newRow, $position, $cellsWithMultipleRows, $extraRow, 'new', true); |
|
|
|
|
486
|
|
|
} else { |
487
|
|
|
$this->diffCellsAndIncrementCounters( |
488
|
|
|
null, |
489
|
|
|
$newCell, |
490
|
|
|
$cellsWithMultipleRows, |
491
|
|
|
$extraRow, |
492
|
|
|
$position, |
493
|
|
|
true |
494
|
|
|
); |
495
|
|
|
$this->syncVirtualColumns($oldRow, $position, $cellsWithMultipleRows, $diffRow, 'old', true); |
|
|
|
|
496
|
|
|
} |
497
|
|
|
} else { |
498
|
|
|
$diffCell = $this->diffCellsAndIncrementCounters( |
499
|
|
|
$oldCell, |
500
|
|
|
$newCell, |
501
|
|
|
$cellsWithMultipleRows, |
502
|
|
|
$diffRow, |
503
|
|
|
$position |
504
|
|
|
); |
505
|
|
|
$expandCells[] = $diffCell; |
506
|
|
|
} |
507
|
|
|
} |
508
|
|
|
|
509
|
|
|
$oldCellCount = count($oldCells); |
510
|
|
|
while ($position->getIndexInOld() < $oldCellCount) { |
511
|
|
|
$diffCell = $this->diffCellsAndIncrementCounters( |
512
|
|
|
$oldCells[$position->getIndexInOld()], |
513
|
|
|
null, |
514
|
|
|
$cellsWithMultipleRows, |
515
|
|
|
$diffRow, |
516
|
|
|
$position |
517
|
|
|
); |
518
|
|
|
$expandCells[] = $diffCell; |
519
|
|
|
} |
520
|
|
|
|
521
|
|
|
if ($extraRow) { |
522
|
|
|
foreach ($expandCells as $expandCell) { |
523
|
|
|
$rowspan = $expandCell->getAttribute('rowspan') ?: 1; |
524
|
|
|
$expandCell->setAttribute('rowspan', 1 + $rowspan); |
525
|
|
|
} |
526
|
|
|
} |
527
|
|
|
|
528
|
|
|
if ($extraRow || $forceExpansion) { |
529
|
|
|
foreach ($appliedRowSpans as $rowSpanCells) { |
530
|
|
|
/* @var $rowSpanCells \DOMElement[] */ |
531
|
|
|
foreach ($rowSpanCells as $extendCell) { |
532
|
|
|
$rowspan = $extendCell->getAttribute('rowspan') ?: 1; |
533
|
|
|
$extendCell->setAttribute('rowspan', 1 + $rowspan); |
534
|
|
|
} |
535
|
|
|
} |
536
|
|
|
} |
537
|
|
|
|
538
|
|
|
if (!$forceExpansion) { |
539
|
|
|
array_shift($appliedRowSpans); |
540
|
|
|
$appliedRowSpans = array_values($appliedRowSpans); |
541
|
|
|
} |
542
|
|
|
$appliedRowSpans = array_merge($appliedRowSpans, array_values($cellsWithMultipleRows)); |
543
|
|
|
|
544
|
|
|
return array($diffRow, $extraRow); |
545
|
|
|
} |
546
|
|
|
|
547
|
|
|
/** |
548
|
|
|
* @param TableCell|null $oldCell |
549
|
|
|
* @param TableCell|null $newCell |
550
|
|
|
* |
551
|
|
|
* @return \DOMElement |
552
|
|
|
*/ |
553
|
|
|
protected function getNewCellNode(TableCell $oldCell = null, TableCell $newCell = null) |
554
|
|
|
{ |
555
|
|
|
// If only one cell exists, use it |
556
|
|
|
if (!$oldCell || !$newCell) { |
557
|
|
|
$clone = $newCell |
558
|
|
|
? $newCell->getDomNode()->cloneNode(false) |
559
|
|
|
: $oldCell->getDomNode()->cloneNode(false); |
|
|
|
|
560
|
|
|
} else { |
561
|
|
|
$oldNode = $oldCell->getDomNode(); |
562
|
|
|
$newNode = $newCell->getDomNode(); |
563
|
|
|
|
564
|
|
|
/* @var $clone \DOMElement */ |
565
|
|
|
$clone = $newNode->cloneNode(false); |
566
|
|
|
|
567
|
|
|
$oldRowspan = $oldNode->getAttribute('rowspan') ?: 1; |
568
|
|
|
$oldColspan = $oldNode->getAttribute('colspan') ?: 1; |
569
|
|
|
$newRowspan = $newNode->getAttribute('rowspan') ?: 1; |
570
|
|
|
$newColspan = $newNode->getAttribute('colspan') ?: 1; |
571
|
|
|
|
572
|
|
|
$clone->setAttribute('rowspan', max($oldRowspan, $newRowspan)); |
573
|
|
|
$clone->setAttribute('colspan', max($oldColspan, $newColspan)); |
574
|
|
|
} |
575
|
|
|
|
576
|
|
|
return $this->diffDom->importNode($clone); |
577
|
|
|
} |
578
|
|
|
|
579
|
|
|
/** |
580
|
|
|
* @param TableCell|null $oldCell |
581
|
|
|
* @param TableCell|null $newCell |
582
|
|
|
* @param bool $usingExtraRow |
583
|
|
|
* |
584
|
|
|
* @return \DOMElement |
585
|
|
|
*/ |
586
|
|
|
protected function diffCells($oldCell, $newCell, $usingExtraRow = false) |
587
|
|
|
{ |
588
|
|
|
$diffCell = $this->getNewCellNode($oldCell, $newCell); |
589
|
|
|
|
590
|
|
|
$oldContent = $oldCell ? $this->getInnerHtml($oldCell->getDomNode()) : ''; |
591
|
|
|
$newContent = $newCell ? $this->getInnerHtml($newCell->getDomNode()) : ''; |
592
|
|
|
|
593
|
|
|
$htmlDiff = HtmlDiff::create( |
594
|
|
|
mb_convert_encoding($oldContent, 'UTF-8', 'HTML-ENTITIES'), |
595
|
|
|
mb_convert_encoding($newContent, 'UTF-8', 'HTML-ENTITIES'), |
596
|
|
|
$this->config |
597
|
|
|
); |
598
|
|
|
$diff = $htmlDiff->build(); |
599
|
|
|
|
600
|
|
|
$this->setInnerHtml($diffCell, $diff); |
601
|
|
|
|
602
|
|
|
if (null === $newCell) { |
603
|
|
|
$diffCell->setAttribute('class', trim($diffCell->getAttribute('class').' del')); |
604
|
|
|
} |
605
|
|
|
|
606
|
|
|
if (null === $oldCell) { |
607
|
|
|
$diffCell->setAttribute('class', trim($diffCell->getAttribute('class').' ins')); |
608
|
|
|
} |
609
|
|
|
|
610
|
|
|
if ($usingExtraRow) { |
611
|
|
|
$diffCell->setAttribute('class', trim($diffCell->getAttribute('class').' extra-row')); |
612
|
|
|
} |
613
|
|
|
|
614
|
|
|
return $diffCell; |
615
|
|
|
} |
616
|
|
|
|
617
|
|
|
protected function buildTableDoms() |
618
|
|
|
{ |
619
|
|
|
$this->oldTable = $this->parseTableStructure($this->oldText); |
620
|
|
|
$this->newTable = $this->parseTableStructure($this->newText); |
621
|
|
|
} |
622
|
|
|
|
623
|
|
|
/** |
624
|
|
|
* @param string $text |
625
|
|
|
* |
626
|
|
|
* @return \DOMDocument |
627
|
|
|
*/ |
628
|
|
|
protected function createDocumentWithHtml($text) |
629
|
|
|
{ |
630
|
|
|
$dom = new \DOMDocument(); |
631
|
|
|
$dom->loadHTML(mb_convert_encoding( |
632
|
|
|
$this->purifier->purify(mb_convert_encoding($text, $this->config->getEncoding(), mb_detect_encoding($text))), |
|
|
|
|
633
|
|
|
'HTML-ENTITIES', |
634
|
|
|
$this->config->getEncoding() |
635
|
|
|
)); |
636
|
|
|
|
637
|
|
|
return $dom; |
638
|
|
|
} |
639
|
|
|
|
640
|
|
|
/** |
641
|
|
|
* @param string $text |
642
|
|
|
* |
643
|
|
|
* @return Table |
644
|
|
|
*/ |
645
|
|
|
protected function parseTableStructure($text) |
646
|
|
|
{ |
647
|
|
|
$dom = $this->createDocumentWithHtml($text); |
648
|
|
|
|
649
|
|
|
$tableNode = $dom->getElementsByTagName('table')->item(0); |
650
|
|
|
|
651
|
|
|
$table = new Table($tableNode); |
|
|
|
|
652
|
|
|
|
653
|
|
|
$this->parseTable($table); |
654
|
|
|
|
655
|
|
|
return $table; |
656
|
|
|
} |
657
|
|
|
|
658
|
|
|
/** |
659
|
|
|
* @param Table $table |
660
|
|
|
* @param \DOMNode|null $node |
661
|
|
|
*/ |
662
|
|
|
protected function parseTable(Table $table, \DOMNode $node = null) |
663
|
|
|
{ |
664
|
|
|
if ($node === null) { |
665
|
|
|
$node = $table->getDomNode(); |
666
|
|
|
} |
667
|
|
|
|
668
|
|
|
if (!$node->childNodes) { |
669
|
|
|
return; |
670
|
|
|
} |
671
|
|
|
|
672
|
|
|
foreach ($node->childNodes as $child) { |
673
|
|
|
if ($child->nodeName === 'tr') { |
674
|
|
|
$row = new TableRow($child); |
675
|
|
|
$table->addRow($row); |
676
|
|
|
|
677
|
|
|
$this->parseTableRow($row); |
678
|
|
|
} else { |
679
|
|
|
$this->parseTable($table, $child); |
680
|
|
|
} |
681
|
|
|
} |
682
|
|
|
} |
683
|
|
|
|
684
|
|
|
/** |
685
|
|
|
* @param TableRow $row |
686
|
|
|
*/ |
687
|
|
|
protected function parseTableRow(TableRow $row) |
688
|
|
|
{ |
689
|
|
|
$node = $row->getDomNode(); |
690
|
|
|
|
691
|
|
|
foreach ($node->childNodes as $child) { |
692
|
|
|
if (in_array($child->nodeName, array('td', 'th'))) { |
693
|
|
|
$cell = new TableCell($child); |
694
|
|
|
$row->addCell($cell); |
695
|
|
|
} |
696
|
|
|
} |
697
|
|
|
} |
698
|
|
|
|
699
|
|
|
/** |
700
|
|
|
* @param \DOMNode $node |
701
|
|
|
* |
702
|
|
|
* @return string |
703
|
|
|
*/ |
704
|
|
|
protected function getInnerHtml($node) |
705
|
|
|
{ |
706
|
|
|
$innerHtml = ''; |
707
|
|
|
$children = $node->childNodes; |
708
|
|
|
|
709
|
|
|
foreach ($children as $child) { |
710
|
|
|
$innerHtml .= $this->htmlFromNode($child); |
711
|
|
|
} |
712
|
|
|
|
713
|
|
|
return $innerHtml; |
714
|
|
|
} |
715
|
|
|
|
716
|
|
|
/** |
717
|
|
|
* @param \DOMNode $node |
718
|
|
|
* |
719
|
|
|
* @return string |
720
|
|
|
*/ |
721
|
|
View Code Duplication |
protected function htmlFromNode($node) |
|
|
|
|
722
|
|
|
{ |
723
|
|
|
$domDocument = new \DOMDocument(); |
724
|
|
|
$newNode = $domDocument->importNode($node, true); |
725
|
|
|
$domDocument->appendChild($newNode); |
726
|
|
|
|
727
|
|
|
return $domDocument->saveHTML(); |
728
|
|
|
} |
729
|
|
|
|
730
|
|
|
/** |
731
|
|
|
* @param \DOMNode $node |
732
|
|
|
* @param string $html |
733
|
|
|
*/ |
734
|
|
|
protected function setInnerHtml($node, $html) |
735
|
|
|
{ |
736
|
|
|
// DOMDocument::loadHTML does not allow empty strings. |
737
|
|
|
if (strlen($html) === 0) { |
738
|
|
|
$html = '<span class="empty"></span>'; |
739
|
|
|
} |
740
|
|
|
|
741
|
|
|
$doc = $this->createDocumentWithHtml($html); |
742
|
|
|
$fragment = $node->ownerDocument->createDocumentFragment(); |
743
|
|
|
$root = $doc->getElementsByTagName('body')->item(0); |
744
|
|
|
foreach ($root->childNodes as $child) { |
745
|
|
|
$fragment->appendChild($node->ownerDocument->importNode($child, true)); |
746
|
|
|
} |
747
|
|
|
|
748
|
|
|
$node->appendChild($fragment); |
749
|
|
|
} |
750
|
|
|
|
751
|
|
|
/** |
752
|
|
|
* @param Table $table |
753
|
|
|
*/ |
754
|
|
|
protected function indexCellValues(Table $table) |
755
|
|
|
{ |
756
|
|
|
foreach ($table->getRows() as $rowIndex => $row) { |
757
|
|
|
foreach ($row->getCells() as $cellIndex => $cell) { |
758
|
|
|
$value = trim($cell->getDomNode()->textContent); |
759
|
|
|
|
760
|
|
|
if (!isset($this->cellValues[$value])) { |
761
|
|
|
$this->cellValues[$value] = array(); |
762
|
|
|
} |
763
|
|
|
|
764
|
|
|
$this->cellValues[$value][] = new TablePosition($rowIndex, $cellIndex); |
765
|
|
|
} |
766
|
|
|
} |
767
|
|
|
} |
768
|
|
|
|
769
|
|
|
/** |
770
|
|
|
* @param TableRow $tableRow |
771
|
|
|
* @param DiffRowPosition $position |
772
|
|
|
* @param array $cellsWithMultipleRows |
773
|
|
|
* @param \DOMNode $diffRow |
774
|
|
|
* @param string $diffType |
775
|
|
|
* @param bool $usingExtraRow |
776
|
|
|
*/ |
777
|
|
|
protected function syncVirtualColumns( |
778
|
|
|
$tableRow, |
779
|
|
|
DiffRowPosition $position, |
780
|
|
|
&$cellsWithMultipleRows, |
781
|
|
|
$diffRow, |
782
|
|
|
$diffType, |
783
|
|
|
$usingExtraRow = false |
784
|
|
|
) { |
785
|
|
|
$currentCell = $tableRow->getCell($position->getIndex($diffType)); |
786
|
|
|
while ($position->isColumnLessThanOther($diffType) && $currentCell) { |
787
|
|
|
$diffCell = $diffType === 'new' ? $this->diffCells(null, $currentCell, $usingExtraRow) : $this->diffCells( |
788
|
|
|
$currentCell, |
789
|
|
|
null, |
790
|
|
|
$usingExtraRow |
791
|
|
|
); |
792
|
|
|
// Store cell in appliedRowSpans if spans multiple rows |
793
|
|
|
if ($diffCell->getAttribute('rowspan') > 1) { |
794
|
|
|
$cellsWithMultipleRows[$diffCell->getAttribute('rowspan')][] = $diffCell; |
795
|
|
|
} |
796
|
|
|
$diffRow->appendChild($diffCell); |
797
|
|
|
$position->incrementColumn($diffType, $currentCell->getColspan()); |
798
|
|
|
$currentCell = $tableRow->getCell($position->incrementIndex($diffType)); |
799
|
|
|
} |
800
|
|
|
} |
801
|
|
|
|
802
|
|
|
/** |
803
|
|
|
* @param null|TableCell $oldCell |
804
|
|
|
* @param null|TableCell $newCell |
805
|
|
|
* @param array $cellsWithMultipleRows |
806
|
|
|
* @param \DOMElement $diffRow |
807
|
|
|
* @param DiffRowPosition $position |
808
|
|
|
* @param bool $usingExtraRow |
809
|
|
|
* |
810
|
|
|
* @return \DOMElement |
811
|
|
|
*/ |
812
|
|
|
protected function diffCellsAndIncrementCounters( |
813
|
|
|
$oldCell, |
814
|
|
|
$newCell, |
815
|
|
|
&$cellsWithMultipleRows, |
816
|
|
|
$diffRow, |
817
|
|
|
DiffRowPosition $position, |
818
|
|
|
$usingExtraRow = false |
819
|
|
|
) { |
820
|
|
|
$diffCell = $this->diffCells($oldCell, $newCell, $usingExtraRow); |
821
|
|
|
// Store cell in appliedRowSpans if spans multiple rows |
822
|
|
|
if ($diffCell->getAttribute('rowspan') > 1) { |
823
|
|
|
$cellsWithMultipleRows[$diffCell->getAttribute('rowspan')][] = $diffCell; |
824
|
|
|
} |
825
|
|
|
$diffRow->appendChild($diffCell); |
826
|
|
|
|
827
|
|
|
if ($newCell !== null) { |
828
|
|
|
$position->incrementIndexInNew(); |
829
|
|
|
$position->incrementColumnInNew($newCell->getColspan()); |
830
|
|
|
} |
831
|
|
|
|
832
|
|
|
if ($oldCell !== null) { |
833
|
|
|
$position->incrementIndexInOld(); |
834
|
|
|
$position->incrementColumnInOld($oldCell->getColspan()); |
835
|
|
|
} |
836
|
|
|
|
837
|
|
|
return $diffCell; |
838
|
|
|
} |
839
|
|
|
|
840
|
|
|
/** |
841
|
|
|
* @param TableRow|null $oldRow |
842
|
|
|
* @param TableRow|null $newRow |
843
|
|
|
* @param array $appliedRowSpans |
844
|
|
|
* @param bool $forceExpansion |
845
|
|
|
*/ |
846
|
|
|
protected function diffAndAppendRows($oldRow, $newRow, &$appliedRowSpans, $forceExpansion = false) |
847
|
|
|
{ |
848
|
|
|
list($rowDom, $extraRow) = $this->diffRows( |
849
|
|
|
$oldRow, |
850
|
|
|
$newRow, |
851
|
|
|
$appliedRowSpans, |
852
|
|
|
$forceExpansion |
853
|
|
|
); |
854
|
|
|
|
855
|
|
|
$this->diffTable->appendChild($rowDom); |
856
|
|
|
|
857
|
|
|
if ($extraRow) { |
858
|
|
|
$this->diffTable->appendChild($extraRow); |
859
|
|
|
} |
860
|
|
|
} |
861
|
|
|
|
862
|
|
|
/** |
863
|
|
|
* @param TableRow $oldRow |
864
|
|
|
* @param TableRow $newRow |
865
|
|
|
* @param int $oldIndex |
866
|
|
|
* @param int $newIndex |
867
|
|
|
* |
868
|
|
|
* @return float|int |
869
|
|
|
*/ |
870
|
|
|
protected function getMatchPercentage(TableRow $oldRow, TableRow $newRow, $oldIndex, $newIndex) |
871
|
|
|
{ |
872
|
|
|
$firstCellWeight = 1.5; |
873
|
|
|
$indexDeltaWeight = 0.25 * (abs($oldIndex - $newIndex)); |
874
|
|
|
$thresholdCount = 0; |
875
|
|
|
$minCells = min(count($newRow->getCells()), count($oldRow->getCells())); |
876
|
|
|
$totalCount = ($minCells + $firstCellWeight + $indexDeltaWeight) * 100; |
877
|
|
|
foreach ($newRow->getCells() as $newIndex => $newCell) { |
878
|
|
|
$oldCell = $oldRow->getCell($newIndex); |
|
|
|
|
879
|
|
|
|
880
|
|
|
if ($oldCell) { |
881
|
|
|
$percentage = null; |
882
|
|
|
similar_text($oldCell->getInnerHtml(), $newCell->getInnerHtml(), $percentage); |
883
|
|
|
|
884
|
|
|
if ($percentage > ($this->config->getMatchThreshold() * 0.50)) { |
885
|
|
|
$increment = $percentage; |
886
|
|
|
if ($newIndex === 0 && $percentage > 95) { |
887
|
|
|
$increment = $increment * $firstCellWeight; |
888
|
|
|
} |
889
|
|
|
$thresholdCount += $increment; |
890
|
|
|
} |
891
|
|
|
} |
892
|
|
|
} |
893
|
|
|
|
894
|
|
|
return ($totalCount > 0) ? ($thresholdCount / $totalCount) : 0; |
895
|
|
|
} |
896
|
|
|
} |
897
|
|
|
|
Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.
You can also find more detailed suggestions in the “Code” section of your repository.