1
|
|
|
<?php |
2
|
|
|
|
3
|
|
|
namespace Caxy\HtmlDiff\Table; |
4
|
|
|
|
5
|
|
|
use Caxy\HtmlDiff\AbstractDiff; |
6
|
|
|
use Caxy\HtmlDiff\HtmlDiff; |
7
|
|
|
use Caxy\HtmlDiff\Operation; |
8
|
|
|
|
9
|
|
|
/** |
10
|
|
|
* @todo Add getters to TableMatch entity |
11
|
|
|
* @todo Move applicable functions to new table classes |
12
|
|
|
* @todo find matches of row/cells in order to handle row/cell additions/deletions |
13
|
|
|
* @todo clean up way to iterate between new and old cells |
14
|
|
|
* @todo Make sure diffed table keeps <tbody> or other table structure elements |
15
|
|
|
* @todo Encoding |
16
|
|
|
*/ |
17
|
|
|
class TableDiff extends AbstractDiff |
18
|
|
|
{ |
19
|
|
|
/** |
20
|
|
|
* @var null|Table |
21
|
|
|
*/ |
22
|
|
|
protected $oldTable = null; |
23
|
|
|
|
24
|
|
|
/** |
25
|
|
|
* @var null|Table |
26
|
|
|
*/ |
27
|
|
|
protected $newTable = null; |
28
|
|
|
|
29
|
|
|
/** |
30
|
|
|
* @var null|Table |
31
|
|
|
*/ |
32
|
|
|
protected $diffTable = null; |
33
|
|
|
|
34
|
|
|
/** |
35
|
|
|
* @var null|\DOMDocument |
36
|
|
|
*/ |
37
|
|
|
protected $diffDom = null; |
38
|
|
|
|
39
|
|
|
/** |
40
|
|
|
* @var int |
41
|
|
|
*/ |
42
|
|
|
protected $newRowOffsets = 0; |
43
|
|
|
|
44
|
|
|
/** |
45
|
|
|
* @var int |
46
|
|
|
*/ |
47
|
|
|
protected $oldRowOffsets = 0; |
48
|
|
|
|
49
|
|
|
/** |
50
|
|
|
* @var array |
51
|
|
|
*/ |
52
|
|
|
protected $cellValues = array(); |
53
|
|
|
|
54
|
|
|
/** |
55
|
|
|
* @var \HTMLPurifier |
56
|
|
|
*/ |
57
|
|
|
protected $purifier; |
58
|
|
|
|
59
|
|
|
public function __construct($oldText, $newText, $encoding, $specialCaseTags, $groupDiffs) |
60
|
|
|
{ |
61
|
|
|
parent::__construct($oldText, $newText, $encoding, $specialCaseTags, $groupDiffs); |
62
|
|
|
|
63
|
|
|
$config = \HTMLPurifier_Config::createDefault(); |
64
|
|
|
$this->purifier = new \HTMLPurifier($config); |
65
|
|
|
} |
66
|
|
|
|
67
|
|
|
public function build() |
68
|
|
|
{ |
69
|
|
|
$this->buildTableDoms(); |
70
|
|
|
|
71
|
|
|
$this->diffDom = new \DOMDocument(); |
72
|
|
|
|
73
|
|
|
// $this->normalizeFormat(); |
|
|
|
|
74
|
|
|
|
75
|
|
|
$this->indexCellValues($this->newTable); |
|
|
|
|
76
|
|
|
|
77
|
|
|
$this->diffTableContent(); |
78
|
|
|
|
79
|
|
|
return $this->content; |
80
|
|
|
} |
81
|
|
|
|
82
|
|
|
protected function normalizeFormat() |
83
|
|
|
{ |
84
|
|
|
$oldRows = $this->oldTable->getRows(); |
85
|
|
|
$newRows = $this->newTable->getRows(); |
86
|
|
|
|
87
|
|
|
foreach ($newRows as $rowIndex => $newRow) { |
88
|
|
|
$oldRow = isset($oldRows[$rowIndex]) ? $oldRows[$rowIndex] : null; |
89
|
|
|
|
90
|
|
|
if (!$oldRow) { |
91
|
|
|
continue; |
92
|
|
|
} |
93
|
|
|
|
94
|
|
|
$newRowOffset = 0; |
95
|
|
|
$oldRowOffset = 0; |
96
|
|
|
|
97
|
|
|
$newCells = $newRow->getCells(); |
98
|
|
|
$oldCells = $oldRow->getCells(); |
99
|
|
|
|
100
|
|
|
foreach ($newCells as $cellIndex => $newCell) { |
101
|
|
|
$oldCell = isset($oldCells[$cellIndex]) ? $oldCells[$cellIndex] : null; |
102
|
|
|
|
103
|
|
|
if ($oldCell) { |
104
|
|
|
$oldNode = $oldCell->getDomNode(); |
105
|
|
|
$newNode = $newCell->getDomNode(); |
106
|
|
|
|
107
|
|
|
$oldRowspan = $oldNode->getAttribute('rowspan') ?: 1; |
108
|
|
|
$newRowspan = $newNode->getAttribute('rowspan') ?: 1; |
109
|
|
|
|
110
|
|
|
if ($oldRowspan > $newRowspan) { |
111
|
|
|
// add placeholders in next row of new rows |
112
|
|
|
$offset = $oldRowspan - $newRowspan; |
113
|
|
|
if ($offset > $newRowOffset) { |
114
|
|
|
$newRowOffset = $offset; |
115
|
|
|
} |
116
|
|
|
} elseif ($newRowspan > $oldRowspan) { |
117
|
|
|
$offset = $newRowspan - $oldRowspan; |
118
|
|
|
if ($offset > $oldRowOffset) { |
119
|
|
|
$oldRowOffset = $offset; |
120
|
|
|
} |
121
|
|
|
} |
122
|
|
|
} |
123
|
|
|
} |
124
|
|
|
|
125
|
|
|
if ($oldRowOffset > 0 && isset($newRows[$rowIndex + 1])) { |
126
|
|
|
$blankRow = $this->diffDom->createElement('tr'); |
127
|
|
|
|
128
|
|
|
$insertArray = array(); |
129
|
|
|
for ($i = 0; $i < $oldRowOffset; $i++) { |
130
|
|
|
$insertArray[] = new TableRow($blankRow); |
131
|
|
|
} |
132
|
|
|
|
133
|
|
|
$this->oldTable->insertRows($insertArray, $rowIndex + 1); |
134
|
|
|
} elseif ($newRowOffset > 0 && isset($newRows[$rowIndex + 1])) { |
135
|
|
|
$blankRow = $this->diffDom->createElement('tr'); |
136
|
|
|
|
137
|
|
|
$insertArray = array(); |
138
|
|
|
for ($i = 0; $i < $newRowOffset; $i++) { |
139
|
|
|
$insertArray[] = new TableRow($blankRow); |
140
|
|
|
} |
141
|
|
|
$this->newTable->insertRows($insertArray, $rowIndex + 1); |
142
|
|
|
} |
143
|
|
|
} |
144
|
|
|
} |
145
|
|
|
|
146
|
|
|
protected function diffTableContent() |
147
|
|
|
{ |
148
|
|
|
$this->diffDom = new \DOMDocument(); |
149
|
|
|
$this->diffTable = $this->diffDom->importNode($this->newTable->getDomNode()->cloneNode(false), false); |
|
|
|
|
150
|
|
|
$this->diffDom->appendChild($this->diffTable); |
151
|
|
|
|
152
|
|
|
$oldRows = $this->oldTable->getRows(); |
153
|
|
|
$newRows = $this->newTable->getRows(); |
154
|
|
|
|
155
|
|
|
$oldMatchData = array(); |
156
|
|
|
$newMatchData = array(); |
157
|
|
|
|
158
|
|
|
/* @var $oldRow TableRow */ |
159
|
|
|
foreach ($oldRows as $oldIndex => $oldRow) { |
160
|
|
|
$oldMatchData[$oldIndex] = array(); |
161
|
|
|
|
162
|
|
|
// Get match percentages |
163
|
|
|
/* @var $newRow TableRow */ |
164
|
|
|
foreach ($newRows as $newIndex => $newRow) { |
165
|
|
|
if (!array_key_exists($newIndex, $newMatchData)) { |
166
|
|
|
$newMatchData[$newIndex] = array(); |
167
|
|
|
} |
168
|
|
|
|
169
|
|
|
// similar_text |
170
|
|
|
$percentage = $this->getMatchPercentage($oldRow, $newRow, $oldIndex, $newIndex); |
171
|
|
|
|
172
|
|
|
$oldMatchData[$oldIndex][$newIndex] = $percentage; |
173
|
|
|
$newMatchData[$newIndex][$oldIndex] = $percentage; |
174
|
|
|
} |
175
|
|
|
} |
176
|
|
|
|
177
|
|
|
$matches = $this->getRowMatches($oldMatchData, $newMatchData); |
178
|
|
|
$this->diffTableRowsWithMatches($oldRows, $newRows, $matches); |
179
|
|
|
|
180
|
|
|
$this->content = $this->htmlFromNode($this->diffTable); |
181
|
|
|
} |
182
|
|
|
|
183
|
|
|
/** |
184
|
|
|
* @param TableRow[] $oldRows |
185
|
|
|
* @param TableRow[] $newRows |
186
|
|
|
* @param RowMatch[] $matches |
187
|
|
|
*/ |
188
|
|
|
protected function diffTableRowsWithMatches($oldRows, $newRows, $matches) |
189
|
|
|
{ |
190
|
|
|
$operations = array(); |
191
|
|
|
|
192
|
|
|
$indexInOld = 0; |
193
|
|
|
$indexInNew = 0; |
194
|
|
|
|
195
|
|
|
$oldRowCount = count($oldRows); |
196
|
|
|
$newRowCount = count($newRows); |
197
|
|
|
|
198
|
|
|
$matches[] = new RowMatch($newRowCount, $oldRowCount, $newRowCount, $oldRowCount); |
199
|
|
|
|
200
|
|
|
// build operations |
201
|
|
|
foreach ($matches as $match) { |
202
|
|
|
$matchAtIndexInOld = $indexInOld === $match->getStartInOld(); |
203
|
|
|
$matchAtIndexInNew = $indexInNew === $match->getStartInNew(); |
204
|
|
|
|
205
|
|
|
$action = 'equal'; |
206
|
|
|
|
207
|
|
|
if (!$matchAtIndexInOld && !$matchAtIndexInNew) { |
208
|
|
|
$action = 'replace'; |
209
|
|
|
} elseif ($matchAtIndexInOld && !$matchAtIndexInNew) { |
210
|
|
|
$action = 'insert'; |
211
|
|
|
} elseif (!$matchAtIndexInOld && $matchAtIndexInNew) { |
212
|
|
|
$action = 'delete'; |
213
|
|
|
} |
214
|
|
|
|
215
|
|
|
if ($action !== 'equal') { |
216
|
|
|
$operations[] = new Operation($action, $indexInOld, $match->getStartInOld(), $indexInNew, $match->getStartInNew()); |
|
|
|
|
217
|
|
|
} |
218
|
|
|
|
219
|
|
|
$operations[] = new Operation('equal', $match->getStartInOld(), $match->getEndInOld(), $match->getStartInNew(), $match->getEndInNew()); |
|
|
|
|
220
|
|
|
|
221
|
|
|
$indexInOld = $match->getEndInOld(); |
222
|
|
|
$indexInNew = $match->getEndInNew(); |
223
|
|
|
} |
224
|
|
|
|
225
|
|
|
$appliedRowSpans = array(); |
226
|
|
|
|
227
|
|
|
// process operations |
228
|
|
|
foreach ($operations as $operation) { |
229
|
|
|
switch ($operation->action) { |
230
|
|
|
case 'equal': |
231
|
|
|
$this->processEqualOperation($operation, $oldRows, $newRows, $appliedRowSpans); |
232
|
|
|
break; |
233
|
|
|
|
234
|
|
|
case 'delete': |
235
|
|
|
$this->processDeleteOperation($operation, $oldRows, $appliedRowSpans); |
236
|
|
|
break; |
237
|
|
|
|
238
|
|
|
case 'insert': |
239
|
|
|
$this->processInsertOperation($operation, $newRows, $appliedRowSpans); |
240
|
|
|
break; |
241
|
|
|
|
242
|
|
|
case 'replace': |
243
|
|
|
$this->processReplaceOperation($operation, $oldRows, $newRows, $appliedRowSpans); |
244
|
|
|
break; |
245
|
|
|
} |
246
|
|
|
} |
247
|
|
|
} |
248
|
|
|
|
249
|
|
View Code Duplication |
protected function processInsertOperation(Operation $operation, $newRows, &$appliedRowSpans, $forceExpansion = false) |
|
|
|
|
250
|
|
|
{ |
251
|
|
|
$targetRows = array_slice($newRows, $operation->startInNew, $operation->endInNew - $operation->startInNew); |
252
|
|
|
foreach ($targetRows as $row) { |
253
|
|
|
$this->diffAndAppendRows(null, $row, $appliedRowSpans, $forceExpansion); |
254
|
|
|
} |
255
|
|
|
} |
256
|
|
|
|
257
|
|
View Code Duplication |
protected function processDeleteOperation(Operation $operation, $oldRows, &$appliedRowSpans, $forceExpansion = false) |
|
|
|
|
258
|
|
|
{ |
259
|
|
|
$targetRows = array_slice($oldRows, $operation->startInOld, $operation->endInOld - $operation->startInOld); |
260
|
|
|
foreach ($targetRows as $row) { |
261
|
|
|
$this->diffAndAppendRows($row, null, $appliedRowSpans, $forceExpansion); |
262
|
|
|
} |
263
|
|
|
} |
264
|
|
|
|
265
|
|
|
protected function processEqualOperation(Operation $operation, $oldRows, $newRows, &$appliedRowSpans) |
266
|
|
|
{ |
267
|
|
|
$targetOldRows = array_values(array_slice($oldRows, $operation->startInOld, $operation->endInOld - $operation->startInOld)); |
|
|
|
|
268
|
|
|
$targetNewRows = array_values(array_slice($newRows, $operation->startInNew, $operation->endInNew - $operation->startInNew)); |
|
|
|
|
269
|
|
|
|
270
|
|
|
foreach ($targetNewRows as $index => $newRow) { |
271
|
|
|
if (!isset($targetOldRows[$index])) { |
272
|
|
|
continue; |
273
|
|
|
} |
274
|
|
|
|
275
|
|
|
$this->diffAndAppendRows($targetOldRows[$index], $newRow, $appliedRowSpans); |
276
|
|
|
} |
277
|
|
|
} |
278
|
|
|
|
279
|
|
|
protected function processReplaceOperation(Operation $operation, $oldRows, $newRows, &$appliedRowSpans) |
280
|
|
|
{ |
281
|
|
|
$this->processDeleteOperation($operation, $oldRows, $appliedRowSpans, true); |
282
|
|
|
$this->processInsertOperation($operation, $newRows, $appliedRowSpans, true); |
283
|
|
|
} |
284
|
|
|
|
285
|
|
|
protected function getRowMatches($oldMatchData, $newMatchData) |
|
|
|
|
286
|
|
|
{ |
287
|
|
|
$matches = array(); |
288
|
|
|
|
289
|
|
|
$startInOld = 0; |
290
|
|
|
$startInNew = 0; |
291
|
|
|
$endInOld = count($oldMatchData); |
292
|
|
|
$endInNew = count($newMatchData); |
293
|
|
|
|
294
|
|
|
$this->findRowMatches($newMatchData, $startInOld, $endInOld, $startInNew, $endInNew, $matches); |
295
|
|
|
|
296
|
|
|
return $matches; |
297
|
|
|
} |
298
|
|
|
|
299
|
|
|
protected function findRowMatches($newMatchData, $startInOld, $endInOld, $startInNew, $endInNew, &$matches) |
300
|
|
|
{ |
301
|
|
|
$match = $this->findRowMatch($newMatchData, $startInOld, $endInOld, $startInNew, $endInNew); |
302
|
|
|
if ($match !== null) { |
303
|
|
|
if ($startInOld < $match->getStartInOld() && |
304
|
|
|
$startInNew < $match->getStartInNew() |
305
|
|
|
) { |
306
|
|
|
$this->findRowMatches( |
307
|
|
|
$newMatchData, |
308
|
|
|
$startInOld, |
309
|
|
|
$match->getStartInOld(), |
310
|
|
|
$startInNew, |
311
|
|
|
$match->getStartInNew(), |
312
|
|
|
$matches |
313
|
|
|
); |
314
|
|
|
} |
315
|
|
|
|
316
|
|
|
$matches[] = $match; |
317
|
|
|
|
318
|
|
|
if ($match->getEndInOld() < $endInOld && |
319
|
|
|
$match->getEndInNew() < $endInNew |
320
|
|
|
) { |
321
|
|
|
$this->findRowMatches($newMatchData, $match->getEndInOld(), $endInOld, $match->getEndInNew(), $endInNew, $matches); |
|
|
|
|
322
|
|
|
} |
323
|
|
|
} |
324
|
|
|
} |
325
|
|
|
|
326
|
|
|
protected function findRowMatch($newMatchData, $startInOld, $endInOld, $startInNew, $endInNew) |
327
|
|
|
{ |
328
|
|
|
$bestMatch = null; |
329
|
|
|
$bestPercentage = 0; |
330
|
|
|
|
331
|
|
|
foreach ($newMatchData as $newIndex => $oldMatches) { |
332
|
|
|
if ($newIndex < $startInNew) { |
333
|
|
|
continue; |
334
|
|
|
} |
335
|
|
|
|
336
|
|
|
if ($newIndex >= $endInNew) { |
337
|
|
|
break; |
338
|
|
|
} |
339
|
|
|
foreach ($oldMatches as $oldIndex => $percentage) { |
340
|
|
|
if ($oldIndex < $startInOld) { |
341
|
|
|
continue; |
342
|
|
|
} |
343
|
|
|
|
344
|
|
|
if ($oldIndex >= $endInOld) { |
345
|
|
|
break; |
346
|
|
|
} |
347
|
|
|
|
348
|
|
|
if ($percentage > $bestPercentage) { |
349
|
|
|
$bestPercentage = $percentage; |
350
|
|
|
$bestMatch = array( |
351
|
|
|
'oldIndex' => $oldIndex, |
352
|
|
|
'newIndex' => $newIndex, |
353
|
|
|
'percentage' => $percentage, |
354
|
|
|
); |
355
|
|
|
} |
356
|
|
|
} |
357
|
|
|
} |
358
|
|
|
|
359
|
|
|
if ($bestMatch !== null) { |
360
|
|
|
return new RowMatch($bestMatch['newIndex'], $bestMatch['oldIndex'], $bestMatch['newIndex'] + 1, $bestMatch['oldIndex'] + 1, $bestMatch['percentage']); |
|
|
|
|
361
|
|
|
} |
362
|
|
|
|
363
|
|
|
return null; |
364
|
|
|
} |
365
|
|
|
|
366
|
|
|
/** |
367
|
|
|
* @param TableRow|null $oldRow |
368
|
|
|
* @param TableRow|null $newRow |
369
|
|
|
* @param array $appliedRowSpans |
370
|
|
|
* @param bool $forceExpansion |
371
|
|
|
* |
372
|
|
|
* @return \DOMNode |
|
|
|
|
373
|
|
|
*/ |
374
|
|
|
protected function diffRows($oldRow, $newRow, array &$appliedRowSpans, $forceExpansion = false) |
375
|
|
|
{ |
376
|
|
|
// create tr dom element |
377
|
|
|
$rowToClone = $newRow ?: $oldRow; |
378
|
|
|
$diffRow = $this->diffDom->importNode($rowToClone->getDomNode()->cloneNode(false), false); |
379
|
|
|
|
380
|
|
|
$oldCells = $oldRow ? $oldRow->getCells() : array(); |
381
|
|
|
$newCells = $newRow ? $newRow->getCells() : array(); |
382
|
|
|
|
383
|
|
|
$position = new DiffRowPosition(); |
384
|
|
|
|
385
|
|
|
$extraRow = null; |
386
|
|
|
|
387
|
|
|
$expandCells = array(); |
388
|
|
|
$cellsWithMultipleRows = array(); |
389
|
|
|
|
390
|
|
|
// @todo: Do cell matching |
391
|
|
|
|
392
|
|
|
$newCellCount = count($newCells); |
393
|
|
|
while ($position->getIndexInNew() < $newCellCount) { |
394
|
|
|
if (!$position->areColumnsEqual()) { |
395
|
|
|
$type = $position->getLesserColumnType(); |
396
|
|
|
if ($type === 'new') { |
397
|
|
|
$row = $newRow; |
398
|
|
|
$targetRow = $extraRow; |
399
|
|
|
} else { |
400
|
|
|
$row = $oldRow; |
401
|
|
|
$targetRow = $diffRow; |
402
|
|
|
} |
403
|
|
|
if ($row && (!$type === 'old' || isset($oldCells[$position->getIndexInOld()]))) { |
404
|
|
|
$this->syncVirtualColumns($row, $position, $cellsWithMultipleRows, $targetRow, $type, true); |
405
|
|
|
|
406
|
|
|
continue; |
407
|
|
|
} |
408
|
|
|
} |
409
|
|
|
|
410
|
|
|
/* @var $newCell TableCell */ |
411
|
|
|
$newCell = $newCells[$position->getIndexInNew()]; |
412
|
|
|
/* @var $oldCell TableCell */ |
413
|
|
|
$oldCell = isset($oldCells[$position->getIndexInOld()]) ? $oldCells[$position->getIndexInOld()] : null; |
414
|
|
|
|
415
|
|
|
if ($oldCell && $newCell->getColspan() != $oldCell->getColspan()) { |
416
|
|
|
if (null === $extraRow) { |
417
|
|
|
$extraRow = $this->diffDom->importNode($rowToClone->getDomNode()->cloneNode(false), false); |
418
|
|
|
} |
419
|
|
|
|
420
|
|
|
// @todo: How do we handle cells that have both rowspan and colspan? |
421
|
|
|
|
422
|
|
|
if ($oldCell->getColspan() > $newCell->getColspan()) { |
423
|
|
|
$this->diffCellsAndIncrementCounters( |
424
|
|
|
$oldCell, |
425
|
|
|
null, |
426
|
|
|
$cellsWithMultipleRows, |
427
|
|
|
$diffRow, |
428
|
|
|
$position, |
429
|
|
|
true |
430
|
|
|
); |
431
|
|
|
$this->syncVirtualColumns($newRow, $position, $cellsWithMultipleRows, $extraRow, 'new', true); |
432
|
|
|
} else { |
433
|
|
|
$this->diffCellsAndIncrementCounters( |
434
|
|
|
null, |
435
|
|
|
$newCell, |
436
|
|
|
$cellsWithMultipleRows, |
437
|
|
|
$extraRow, |
438
|
|
|
$position, |
439
|
|
|
true |
440
|
|
|
); |
441
|
|
|
$this->syncVirtualColumns($oldRow, $position, $cellsWithMultipleRows, $diffRow, 'old', true); |
442
|
|
|
} |
443
|
|
|
} else { |
444
|
|
|
$diffCell = $this->diffCellsAndIncrementCounters( |
445
|
|
|
$oldCell, |
446
|
|
|
$newCell, |
447
|
|
|
$cellsWithMultipleRows, |
448
|
|
|
$diffRow, |
449
|
|
|
$position |
450
|
|
|
); |
451
|
|
|
$expandCells[] = $diffCell; |
452
|
|
|
} |
453
|
|
|
} |
454
|
|
|
|
455
|
|
|
$oldCellCount = count($oldCells); |
456
|
|
|
while ($position->getIndexInOld() < $oldCellCount) { |
457
|
|
|
$diffCell = $this->diffCellsAndIncrementCounters( |
458
|
|
|
$oldCells[$position->getIndexInOld()], |
459
|
|
|
null, |
460
|
|
|
$cellsWithMultipleRows, |
461
|
|
|
$diffRow, |
462
|
|
|
$position |
463
|
|
|
); |
464
|
|
|
$expandCells[] = $diffCell; |
465
|
|
|
} |
466
|
|
|
|
467
|
|
|
if ($extraRow) { |
468
|
|
|
foreach ($expandCells as $expandCell) { |
469
|
|
|
$expandCell->setAttribute('rowspan', $expandCell->getAttribute('rowspan') + 1); |
470
|
|
|
} |
471
|
|
|
} |
472
|
|
|
|
473
|
|
|
if ($extraRow || $forceExpansion) { |
474
|
|
|
foreach ($appliedRowSpans as $rowSpanCells) { |
475
|
|
|
foreach ($rowSpanCells as $extendCell) { |
476
|
|
|
$extendCell->setAttribute('rowspan', $extendCell->getAttribute('rowspan') + 1); |
477
|
|
|
} |
478
|
|
|
} |
479
|
|
|
} |
480
|
|
|
|
481
|
|
|
if (!$forceExpansion) { |
482
|
|
|
array_shift($appliedRowSpans); |
483
|
|
|
$appliedRowSpans = array_values($appliedRowSpans); |
484
|
|
|
} |
485
|
|
|
$appliedRowSpans = array_merge($appliedRowSpans, array_values($cellsWithMultipleRows)); |
486
|
|
|
|
487
|
|
|
return array($diffRow, $extraRow); |
488
|
|
|
} |
489
|
|
|
|
490
|
|
|
/** |
491
|
|
|
* @param TableCell|null $oldCell |
492
|
|
|
* @param TableCell|null $newCell |
493
|
|
|
* |
494
|
|
|
* @return \DOMElement |
495
|
|
|
*/ |
496
|
|
|
protected function getNewCellNode(TableCell $oldCell = null, TableCell $newCell = null) |
497
|
|
|
{ |
498
|
|
|
// If only one cell exists, use it |
499
|
|
|
if (!$oldCell || !$newCell) { |
500
|
|
|
$clone = $newCell |
501
|
|
|
? $newCell->getDomNode()->cloneNode(false) |
502
|
|
|
: $oldCell->getDomNode()->cloneNode(false); |
|
|
|
|
503
|
|
|
} else { |
504
|
|
|
$oldNode = $oldCell->getDomNode(); |
505
|
|
|
$newNode = $newCell->getDomNode(); |
506
|
|
|
|
507
|
|
|
$clone = $newNode->cloneNode(false); |
508
|
|
|
|
509
|
|
|
$oldRowspan = $oldNode->getAttribute('rowspan') ?: 1; |
510
|
|
|
$oldColspan = $oldNode->getAttribute('colspan') ?: 1; |
511
|
|
|
$newRowspan = $newNode->getAttribute('rowspan') ?: 1; |
512
|
|
|
$newColspan = $newNode->getAttribute('colspan') ?: 1; |
513
|
|
|
|
514
|
|
|
$clone->setAttribute('rowspan', max($oldRowspan, $newRowspan)); |
515
|
|
|
$clone->setAttribute('colspan', max($oldColspan, $newColspan)); |
516
|
|
|
} |
517
|
|
|
|
518
|
|
|
return $this->diffDom->importNode($clone); |
519
|
|
|
} |
520
|
|
|
|
521
|
|
|
protected function diffCells($oldCell, $newCell, $usingExtraRow = false) |
522
|
|
|
{ |
523
|
|
|
$diffCell = $this->getNewCellNode($oldCell, $newCell); |
524
|
|
|
|
525
|
|
|
$oldContent = $oldCell ? $this->getInnerHtml($oldCell->getDomNode()) : ''; |
526
|
|
|
$newContent = $newCell ? $this->getInnerHtml($newCell->getDomNode()) : ''; |
527
|
|
|
|
528
|
|
|
$htmlDiff = new HtmlDiff( |
529
|
|
|
mb_convert_encoding($oldContent, 'UTF-8', 'HTML-ENTITIES'), |
530
|
|
|
mb_convert_encoding($newContent, 'UTF-8', 'HTML-ENTITIES'), |
531
|
|
|
$this->encoding, |
532
|
|
|
$this->specialCaseTags, |
533
|
|
|
$this->groupDiffs |
534
|
|
|
); |
535
|
|
|
$htmlDiff->setMatchThreshold($this->matchThreshold); |
536
|
|
|
$diff = $htmlDiff->build(); |
537
|
|
|
|
538
|
|
|
$this->setInnerHtml($diffCell, $diff); |
539
|
|
|
|
540
|
|
|
if (null === $newCell) { |
541
|
|
|
$diffCell->setAttribute('class', trim($diffCell->getAttribute('class').' del')); |
542
|
|
|
} |
543
|
|
|
|
544
|
|
|
if (null === $oldCell) { |
545
|
|
|
$diffCell->setAttribute('class', trim($diffCell->getAttribute('class').' ins')); |
546
|
|
|
} |
547
|
|
|
|
548
|
|
|
if ($usingExtraRow) { |
549
|
|
|
$diffCell->setAttribute('class', trim($diffCell->getAttribute('class').' extra-row')); |
550
|
|
|
} |
551
|
|
|
|
552
|
|
|
return $diffCell; |
553
|
|
|
} |
554
|
|
|
|
555
|
|
|
protected function buildTableDoms() |
556
|
|
|
{ |
557
|
|
|
$this->oldTable = $this->parseTableStructure(mb_convert_encoding($this->oldText, 'HTML-ENTITIES', 'UTF-8')); |
558
|
|
|
$this->newTable = $this->parseTableStructure(mb_convert_encoding($this->newText, 'HTML-ENTITIES', 'UTF-8')); |
559
|
|
|
} |
560
|
|
|
|
561
|
|
|
protected function parseTableStructure($text) |
562
|
|
|
{ |
563
|
|
|
$dom = new \DOMDocument(); |
564
|
|
|
$dom->loadHTML($text); |
565
|
|
|
|
566
|
|
|
$tableNode = $dom->getElementsByTagName('table')->item(0); |
567
|
|
|
|
568
|
|
|
$table = new Table($tableNode); |
|
|
|
|
569
|
|
|
|
570
|
|
|
$this->parseTable($table); |
571
|
|
|
|
572
|
|
|
return $table; |
573
|
|
|
} |
574
|
|
|
|
575
|
|
|
protected function parseTable(Table $table, \DOMNode $node = null) |
576
|
|
|
{ |
577
|
|
|
if ($node === null) { |
578
|
|
|
$node = $table->getDomNode(); |
579
|
|
|
} |
580
|
|
|
|
581
|
|
|
foreach ($node->childNodes as $child) { |
582
|
|
|
if ($child->nodeName === 'tr') { |
583
|
|
|
$row = new TableRow($child); |
584
|
|
|
$table->addRow($row); |
585
|
|
|
|
586
|
|
|
$this->parseTableRow($row); |
587
|
|
|
} else { |
588
|
|
|
$this->parseTable($table, $child); |
589
|
|
|
} |
590
|
|
|
} |
591
|
|
|
} |
592
|
|
|
|
593
|
|
|
protected function parseTableRow(TableRow $row) |
594
|
|
|
{ |
595
|
|
|
$node = $row->getDomNode(); |
596
|
|
|
|
597
|
|
|
foreach ($node->childNodes as $child) { |
598
|
|
|
if (in_array($child->nodeName, array('td', 'th'))) { |
599
|
|
|
$cell = new TableCell($child); |
600
|
|
|
$row->addCell($cell); |
601
|
|
|
} |
602
|
|
|
} |
603
|
|
|
} |
604
|
|
|
|
605
|
|
|
protected function getInnerHtml($node) |
606
|
|
|
{ |
607
|
|
|
$innerHtml = ''; |
608
|
|
|
$children = $node->childNodes; |
609
|
|
|
|
610
|
|
|
foreach ($children as $child) { |
611
|
|
|
$innerHtml .= $this->htmlFromNode($child); |
612
|
|
|
} |
613
|
|
|
|
614
|
|
|
return $innerHtml; |
615
|
|
|
} |
616
|
|
|
|
617
|
|
View Code Duplication |
protected function htmlFromNode($node) |
|
|
|
|
618
|
|
|
{ |
619
|
|
|
$domDocument = new \DOMDocument(); |
620
|
|
|
$newNode = $domDocument->importNode($node, true); |
621
|
|
|
$domDocument->appendChild($newNode); |
622
|
|
|
|
623
|
|
|
return trim($domDocument->saveHTML()); |
624
|
|
|
} |
625
|
|
|
|
626
|
|
|
protected function setInnerHtml($node, $html) |
627
|
|
|
{ |
628
|
|
|
// DOMDocument::loadHTML does not allow empty strings. |
629
|
|
|
if (strlen($html) === 0) { |
630
|
|
|
$html = '<span class="empty"></span>'; |
631
|
|
|
} |
632
|
|
|
|
633
|
|
|
$doc = new \DOMDocument(); |
634
|
|
|
$doc->loadHTML(mb_convert_encoding($this->purifier->purify($html), 'HTML-ENTITIES', 'UTF-8')); |
635
|
|
|
$fragment = $node->ownerDocument->createDocumentFragment(); |
636
|
|
|
$root = $doc->getElementsByTagName('body')->item(0); |
637
|
|
|
foreach ($root->childNodes as $child) { |
638
|
|
|
$fragment->appendChild($node->ownerDocument->importNode($child, true)); |
639
|
|
|
} |
640
|
|
|
|
641
|
|
|
$node->appendChild($fragment); |
642
|
|
|
} |
643
|
|
|
|
644
|
|
|
protected function indexCellValues(Table $table) |
645
|
|
|
{ |
646
|
|
|
foreach ($table->getRows() as $rowIndex => $row) { |
647
|
|
|
foreach ($row->getCells() as $cellIndex => $cell) { |
648
|
|
|
$value = trim($cell->getDomNode()->textContent); |
649
|
|
|
|
650
|
|
|
if (!isset($this->cellValues[$value])) { |
651
|
|
|
$this->cellValues[$value] = array(); |
652
|
|
|
} |
653
|
|
|
|
654
|
|
|
$this->cellValues[$value][] = new TablePosition($rowIndex, $cellIndex); |
655
|
|
|
} |
656
|
|
|
} |
657
|
|
|
} |
658
|
|
|
|
659
|
|
|
/** |
660
|
|
|
* @param $tableRow |
661
|
|
|
* @param $currentColumn |
662
|
|
|
* @param $targetColumn |
663
|
|
|
* @param $currentCell |
664
|
|
|
* @param $cellsWithMultipleRows |
665
|
|
|
* @param $diffRow |
666
|
|
|
* @param $currentIndex |
667
|
|
|
* @param string $diffType |
668
|
|
|
*/ |
669
|
|
|
protected function syncVirtualColumns( |
670
|
|
|
$tableRow, |
671
|
|
|
DiffRowPosition $position, |
672
|
|
|
&$cellsWithMultipleRows, |
673
|
|
|
$diffRow, |
674
|
|
|
$diffType, |
675
|
|
|
$usingExtraRow = false |
676
|
|
|
) { |
677
|
|
|
$currentCell = $tableRow->getCell($position->getIndex($diffType)); |
678
|
|
|
while ($position->isColumnLessThanOther($diffType) && $currentCell) { |
679
|
|
|
$diffCell = $diffType === 'new' ? $this->diffCells(null, $currentCell, $usingExtraRow) : $this->diffCells( |
680
|
|
|
$currentCell, |
681
|
|
|
null, |
682
|
|
|
$usingExtraRow |
683
|
|
|
); |
684
|
|
|
// Store cell in appliedRowSpans if spans multiple rows |
685
|
|
|
if ($diffCell->getAttribute('rowspan') > 1) { |
686
|
|
|
$cellsWithMultipleRows[$diffCell->getAttribute('rowspan')][] = $diffCell; |
687
|
|
|
} |
688
|
|
|
$diffRow->appendChild($diffCell); |
689
|
|
|
$position->incrementColumn($diffType, $currentCell->getColspan()); |
690
|
|
|
$currentCell = $tableRow->getCell($position->incrementIndex($diffType)); |
691
|
|
|
} |
692
|
|
|
} |
693
|
|
|
|
694
|
|
|
/** |
695
|
|
|
* @param null|TableCell $oldCell |
696
|
|
|
* @param null|TableCell $newCell |
697
|
|
|
* @param array $cellsWithMultipleRows |
698
|
|
|
* @param \DOMElement $diffRow |
699
|
|
|
* @param DiffRowPosition $position |
700
|
|
|
* @param bool $usingExtraRow |
701
|
|
|
* |
702
|
|
|
* @return \DOMElement |
703
|
|
|
*/ |
704
|
|
|
protected function diffCellsAndIncrementCounters( |
705
|
|
|
$oldCell, |
706
|
|
|
$newCell, |
707
|
|
|
&$cellsWithMultipleRows, |
708
|
|
|
$diffRow, |
709
|
|
|
DiffRowPosition $position, |
710
|
|
|
$usingExtraRow = false |
711
|
|
|
) { |
712
|
|
|
$diffCell = $this->diffCells($oldCell, $newCell, $usingExtraRow); |
713
|
|
|
// Store cell in appliedRowSpans if spans multiple rows |
714
|
|
|
if ($diffCell->getAttribute('rowspan') > 1) { |
715
|
|
|
$cellsWithMultipleRows[$diffCell->getAttribute('rowspan')][] = $diffCell; |
716
|
|
|
} |
717
|
|
|
$diffRow->appendChild($diffCell); |
718
|
|
|
|
719
|
|
|
if ($newCell !== null) { |
720
|
|
|
$position->incrementIndexInNew(); |
721
|
|
|
$position->incrementColumnInNew($newCell->getColspan()); |
722
|
|
|
} |
723
|
|
|
|
724
|
|
|
if ($oldCell !== null) { |
725
|
|
|
$position->incrementIndexInOld(); |
726
|
|
|
$position->incrementColumnInOld($oldCell->getColspan()); |
727
|
|
|
} |
728
|
|
|
|
729
|
|
|
return $diffCell; |
730
|
|
|
} |
731
|
|
|
|
732
|
|
|
/** |
733
|
|
|
* @param $oldRow |
734
|
|
|
* @param $newRow |
735
|
|
|
* @param $appliedRowSpans |
736
|
|
|
* @param bool $forceExpansion |
737
|
|
|
*/ |
738
|
|
|
protected function diffAndAppendRows($oldRow, $newRow, &$appliedRowSpans, $forceExpansion = false) |
739
|
|
|
{ |
740
|
|
|
list($rowDom, $extraRow) = $this->diffRows( |
741
|
|
|
$oldRow, |
742
|
|
|
$newRow, |
743
|
|
|
$appliedRowSpans, |
744
|
|
|
$forceExpansion |
745
|
|
|
); |
746
|
|
|
|
747
|
|
|
$this->diffTable->appendChild($rowDom); |
|
|
|
|
748
|
|
|
|
749
|
|
|
if ($extraRow) { |
750
|
|
|
$this->diffTable->appendChild($extraRow); |
|
|
|
|
751
|
|
|
} |
752
|
|
|
} |
753
|
|
|
|
754
|
|
|
protected function getMatchPercentage(TableRow $oldRow, TableRow $newRow, $oldIndex, $newIndex) |
755
|
|
|
{ |
756
|
|
|
$firstCellWeight = 1.5; |
757
|
|
|
$indexDeltaWeight = 0.25 * (abs($oldIndex - $newIndex)); |
758
|
|
|
$thresholdCount = 0; |
759
|
|
|
$totalCount = (min(count($newRow->getCells()), count($oldRow->getCells())) + $firstCellWeight + $indexDeltaWeight) * 100; |
|
|
|
|
760
|
|
|
foreach ($newRow->getCells() as $newIndex => $newCell) { |
761
|
|
|
$oldCell = $oldRow->getCell($newIndex); |
|
|
|
|
762
|
|
|
|
763
|
|
|
if ($oldCell) { |
764
|
|
|
$percentage = null; |
765
|
|
|
similar_text($oldCell->getInnerHtml(), $newCell->getInnerHtml(), $percentage); |
766
|
|
|
|
767
|
|
|
if ($percentage > ($this->matchThreshold * 0.50)) { |
768
|
|
|
$increment = $percentage; |
769
|
|
|
if ($newIndex === 0 && $percentage > 95) { |
770
|
|
|
$increment = $increment * $firstCellWeight; |
771
|
|
|
} |
772
|
|
|
$thresholdCount += $increment; |
773
|
|
|
} |
774
|
|
|
} |
775
|
|
|
} |
776
|
|
|
|
777
|
|
|
$matchPercentage = ($totalCount > 0) ? ($thresholdCount / $totalCount) : 0; |
778
|
|
|
|
779
|
|
|
return $matchPercentage; |
780
|
|
|
} |
781
|
|
|
} |
782
|
|
|
|
Sometimes obsolete code just ends up commented out instead of removed. In this case it is better to remove the code once you have checked you do not need it.
The code might also have been commented out for debugging purposes. In this case it is vital that someone uncomments it again or your project may behave in very unexpected ways in production.
This check looks for comments that seem to be mostly valid code and reports them.