Passed
Pull Request — master (#31)
by Josh
03:56
created

TableDiff::getMatchPercentage()   C

Complexity

Conditions 7
Paths 10

Size

Total Lines 26
Code Lines 16

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 0
CRAP Score 56

Importance

Changes 5
Bugs 0 Features 2
Metric Value
c 5
b 0
f 2
dl 0
loc 26
ccs 0
cts 20
cp 0
rs 6.7272
cc 7
eloc 16
nc 10
nop 2
crap 56
1
<?php
2
3
namespace Caxy\HtmlDiff\Table;
4
5
use Caxy\HtmlDiff\AbstractDiff;
6
use Caxy\HtmlDiff\HtmlDiff;
7
use Caxy\HtmlDiff\Operation;
8
9
/**
10
 * @todo Add getters to TableMatch entity
11
 * @todo Move applicable functions to new table classes
12
 * @todo find matches of row/cells in order to handle row/cell additions/deletions
13
 * @todo clean up way to iterate between new and old cells
14
 * @todo Make sure diffed table keeps <tbody> or other table structure elements
15
 * @todo Encoding
16
 */
17
class TableDiff extends AbstractDiff
18
{
19
    /**
20
     * @var null|Table
21
     */
22
    protected $oldTable = null;
23
24
    /**
25
     * @var null|Table
26
     */
27
    protected $newTable = null;
28
29
    /**
30
     * @var null|Table
31
     */
32
    protected $diffTable = null;
33
34
    /**
35
     * @var null|\DOMDocument
36
     */
37
    protected $diffDom = null;
38
39
    /**
40
     * @var int
41
     */
42
    protected $newRowOffsets = 0;
43
44
    /**
45
     * @var int
46
     */
47
    protected $oldRowOffsets = 0;
48
49
    /**
50
     * @var array
51
     */
52
    protected $cellValues = array();
53
54
    /**
55
     * @var \HTMLPurifier
56
     */
57
    protected $purifier;
58
59
    protected $strategy = self::STRATEGY_MATCHING;
60
61
    public function __construct($oldText, $newText, $encoding, $specialCaseTags, $groupDiffs)
62
    {
63
        parent::__construct($oldText, $newText, $encoding, $specialCaseTags, $groupDiffs);
64
65
        $config = \HTMLPurifier_Config::createDefault();
66
        $this->purifier = new \HTMLPurifier($config);
67
    }
68
69
    public function build()
70
    {
71
        $this->buildTableDoms();
72
73
        $this->diffDom = new \DOMDocument();
74
75
        $this->normalizeFormat();
76
77
        $this->indexCellValues($this->newTable);
0 ignored issues
show
Bug introduced by
It seems like $this->newTable can be null; however, indexCellValues() does not accept null, maybe add an additional type check?

Unless you are absolutely sure that the expression can never be null because of other conditions, we strongly recommend to add an additional type check to your code:

/** @return stdClass|null */
function mayReturnNull() { }

function doesNotAcceptNull(stdClass $x) { }

// With potential error.
function withoutCheck() {
    $x = mayReturnNull();
    doesNotAcceptNull($x); // Potential error here.
}

// Safe - Alternative 1
function withCheck1() {
    $x = mayReturnNull();
    if ( ! $x instanceof stdClass) {
        throw new \LogicException('$x must be defined.');
    }
    doesNotAcceptNull($x);
}

// Safe - Alternative 2
function withCheck2() {
    $x = mayReturnNull();
    if ($x instanceof stdClass) {
        doesNotAcceptNull($x);
    }
}
Loading history...
78
79
        $this->diffTableContent();
80
81
        return $this->content;
82
    }
83
84
    protected function normalizeFormat()
85
    {
86
        $oldRows = $this->oldTable->getRows();
87
        $newRows = $this->newTable->getRows();
88
89
        foreach ($newRows as $rowIndex => $newRow) {
90
            $oldRow = isset($oldRows[$rowIndex]) ? $oldRows[$rowIndex] : null;
91
92
            if (!$oldRow) {
93
                continue;
94
            }
95
96
            $newRowOffset = 0;
97
            $oldRowOffset = 0;
98
99
            $newCells = $newRow->getCells();
100
            $oldCells = $oldRow->getCells();
101
102
            foreach ($newCells as $cellIndex => $newCell) {
103
                $oldCell = isset($oldCells[$cellIndex]) ? $oldCells[$cellIndex] : null;
104
105
                if ($oldCell) {
106
                    $oldNode = $oldCell->getDomNode();
107
                    $newNode = $newCell->getDomNode();
108
109
                    $oldRowspan = $oldNode->getAttribute('rowspan') ?: 1;
110
                    $newRowspan = $newNode->getAttribute('rowspan') ?: 1;
111
112
                    if ($oldRowspan > $newRowspan) {
113
                        // add placeholders in next row of new rows
114
                        $offset = $oldRowspan - $newRowspan;
115
                        if ($offset > $newRowOffset) {
116
                            $newRowOffset = $offset;
117
                        }
118
                    } elseif ($newRowspan > $oldRowspan) {
119
                        $offset = $newRowspan - $oldRowspan;
120
                        if ($offset > $oldRowOffset) {
121
                            $oldRowOffset = $offset;
122
                        }
123
                    }
124
                }
125
            }
126
127
            if ($oldRowOffset > 0 && isset($newRows[$rowIndex + 1])) {
128
                $blankRow = $this->diffDom->createElement('tr');
129
130
                $insertArray = array();
131
                for ($i = 0; $i < $oldRowOffset; $i++) {
132
                    $insertArray[] = new TableRow($blankRow);
133
                }
134
135
                $this->oldTable->insertRows($insertArray, $rowIndex + 1);
136
            } elseif ($newRowOffset > 0 && isset($newRows[$rowIndex + 1])) {
137
                $blankRow = $this->diffDom->createElement('tr');
138
139
                $insertArray = array();
140
                for ($i = 0; $i < $newRowOffset; $i++) {
141
                    $insertArray[] = new TableRow($blankRow);
142
                }
143
                $this->newTable->insertRows($insertArray, $rowIndex + 1);
144
            }
145
        }
146
    }
147
148
    protected function diffTableContent()
149
    {
150
        $this->diffDom = new \DOMDocument();
151
        $this->diffTable = $this->diffDom->importNode($this->newTable->getDomNode()->cloneNode(false), false);
0 ignored issues
show
Documentation Bug introduced by
It seems like $this->diffDom->importNo...loneNode(false), false) of type object<DOMNode> is incompatible with the declared type null|object<Caxy\HtmlDiff\Table\Table> of property $diffTable.

Our type inference engine has found an assignment to a property that is incompatible with the declared type of that property.

Either this assignment is in error or the assigned type should be added to the documentation/type hint for that property..

Loading history...
152
        $this->diffDom->appendChild($this->diffTable);
153
154
        $oldRows = $this->oldTable->getRows();
155
        $newRows = $this->newTable->getRows();
156
157
        $oldMatchData = array();
158
        $newMatchData = array();
159
160
        /* @var $oldRow TableRow */
161
        foreach ($oldRows as $oldIndex => $oldRow) {
162
            $oldMatchData[$oldIndex] = array();
163
164
            // Get match percentages
165
            /* @var $newRow TableRow */
166
            foreach ($newRows as $newIndex => $newRow) {
167
                if (!array_key_exists($newIndex, $newMatchData)) {
168
                    $newMatchData[$newIndex] = array();
169
                }
170
171
                // similar_text
172
                $percentage = $this->getMatchPercentage($oldRow, $newRow);
173
174
                $oldMatchData[$oldIndex][$newIndex] = $percentage;
175
                $newMatchData[$newIndex][$oldIndex] = $percentage;
176
            }
177
        }
178
179
        // new solution for diffing rows
180
        switch ($this->strategy) {
181
            case self::STRATEGY_MATCHING:
182
                $matches = $this->getRowMatches($oldMatchData, $newMatchData);
183
                $this->diffTableRowsWithMatches($oldRows, $newRows, $matches);
184
                break;
185
186
            case self::STRATEGY_RELATIVE:
187
                $this->diffTableRows($oldRows, $newRows, $oldMatchData);
188
                break;
189
190
            default:
191
                $matches = $this->getRowMatches($oldMatchData, $newMatchData);
192
                $this->diffTableRowsWithMatches($oldRows, $newRows, $matches);
193
                break;
194
        }
195
196
        $this->content = $this->htmlFromNode($this->diffTable);
197
    }
198
199
    /**
200
     * @param TableRow[] $oldRows
201
     * @param TableRow[] $newRows
202
     * @param RowMatch[] $matches
203
     */
204
    protected function diffTableRowsWithMatches($oldRows, $newRows, $matches)
205
    {
206
        $operations = array();
207
208
        $indexInOld = 0;
209
        $indexInNew = 0;
210
211
        $oldRowCount = count($oldRows);
212
        $newRowCount = count($newRows);
213
214
        $matches[] = new RowMatch($newRowCount, $oldRowCount, $newRowCount, $oldRowCount);
215
216
        // build operations
217
        foreach ($matches as $match) {
218
            $matchAtIndexInOld = $indexInOld === $match->getStartInOld();
219
            $matchAtIndexInNew = $indexInNew === $match->getStartInNew();
220
221
            $action = 'equal';
222
223
            if (!$matchAtIndexInOld && !$matchAtIndexInNew) {
224
                $action = 'replace';
225
            } elseif ($matchAtIndexInOld && !$matchAtIndexInNew) {
226
                $action = 'insert';
227
            } elseif (!$matchAtIndexInOld && $matchAtIndexInNew) {
228
                $action = 'delete';
229
            }
230
231
            if ($action !== 'equal') {
232
                $operations[] = new Operation($action, $indexInOld, $match->getStartInOld(), $indexInNew, $match->getStartInNew());
0 ignored issues
show
Coding Style introduced by
This line exceeds maximum limit of 120 characters; contains 131 characters

Overly long lines are hard to read on any screen. Most code styles therefor impose a maximum limit on the number of characters in a line.

Loading history...
233
            }
234
235
            $operations[] = new Operation('equal', $match->getStartInOld(), $match->getEndInOld(), $match->getStartInNew(), $match->getEndInNew());
0 ignored issues
show
Coding Style introduced by
This line exceeds maximum limit of 120 characters; contains 147 characters

Overly long lines are hard to read on any screen. Most code styles therefor impose a maximum limit on the number of characters in a line.

Loading history...
236
237
            $indexInOld = $match->getEndInOld();
238
            $indexInNew = $match->getEndInNew();
239
        }
240
241
        $appliedRowSpans = array();
242
243
        // process operations
244
        foreach ($operations as $operation) {
245
            switch ($operation->action) {
246
                case 'equal':
247
                    $this->processEqualOperation($operation, $oldRows, $newRows, $appliedRowSpans);
248
                    break;
249
250
                case 'delete':
251
                    $this->processDeleteOperation($operation, $oldRows, $appliedRowSpans);
252
                    break;
253
254
                case 'insert':
255
                    $this->processInsertOperation($operation, $newRows, $appliedRowSpans);
256
                    break;
257
258
                case 'replace':
259
                    $this->processReplaceOperation($operation, $oldRows, $newRows, $appliedRowSpans);
260
                    break;
261
            }
262
        }
263
    }
264
265 View Code Duplication
    protected function processInsertOperation(Operation $operation, $newRows, &$appliedRowSpans, $forceExpansion = false)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
Coding Style introduced by
This line exceeds maximum limit of 120 characters; contains 121 characters

Overly long lines are hard to read on any screen. Most code styles therefor impose a maximum limit on the number of characters in a line.

Loading history...
266
    {
267
        $targetRows = array_slice($newRows, $operation->startInNew, $operation->endInNew - $operation->startInNew);
268
        foreach ($targetRows as $row) {
269
            $this->diffAndAppendRows(null, $row, $appliedRowSpans, $forceExpansion);
270
        }
271
    }
272
273 View Code Duplication
    protected function processDeleteOperation(Operation $operation, $oldRows, &$appliedRowSpans, $forceExpansion = false)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
Coding Style introduced by
This line exceeds maximum limit of 120 characters; contains 121 characters

Overly long lines are hard to read on any screen. Most code styles therefor impose a maximum limit on the number of characters in a line.

Loading history...
274
    {
275
        $targetRows = array_slice($oldRows, $operation->startInOld, $operation->endInOld - $operation->startInOld);
276
        foreach ($targetRows as $row) {
277
            $this->diffAndAppendRows($row, null, $appliedRowSpans, $forceExpansion);
278
        }
279
    }
280
281
    protected function processEqualOperation(Operation $operation, $oldRows, $newRows, &$appliedRowSpans)
282
    {
283
        $targetOldRows = array_values(array_slice($oldRows, $operation->startInOld, $operation->endInOld - $operation->startInOld));
0 ignored issues
show
Coding Style introduced by
This line exceeds maximum limit of 120 characters; contains 132 characters

Overly long lines are hard to read on any screen. Most code styles therefor impose a maximum limit on the number of characters in a line.

Loading history...
284
        $targetNewRows = array_values(array_slice($newRows, $operation->startInNew, $operation->endInNew - $operation->startInNew));
0 ignored issues
show
Coding Style introduced by
This line exceeds maximum limit of 120 characters; contains 132 characters

Overly long lines are hard to read on any screen. Most code styles therefor impose a maximum limit on the number of characters in a line.

Loading history...
285
286
        foreach ($targetNewRows as $index => $newRow) {
287
            if (!isset($targetOldRows[$index])) {
288
                continue;
289
            }
290
291
            $this->diffAndAppendRows($targetOldRows[$index], $newRow, $appliedRowSpans);
292
        }
293
    }
294
295
    protected function processReplaceOperation(Operation $operation, $oldRows, $newRows, &$appliedRowSpans)
296
    {
297
        $this->processDeleteOperation($operation, $oldRows, $appliedRowSpans, true);
298
        $this->processInsertOperation($operation, $newRows, $appliedRowSpans, true);
299
    }
300
301
    protected function getRowMatches($oldMatchData, $newMatchData)
0 ignored issues
show
Documentation introduced by
The return type could not be reliably inferred; please add a @return annotation.

Our type inference engine in quite powerful, but sometimes the code does not provide enough clues to go by. In these cases we request you to add a @return annotation as described here.

Loading history...
302
    {
303
        $matches = array();
304
305
        $startInOld = 0;
306
        $startInNew = 0;
307
        $endInOld = count($oldMatchData);
308
        $endInNew = count($newMatchData);
309
310
        $this->findRowMatches($newMatchData, $startInOld, $endInOld, $startInNew, $endInNew, $matches);
311
312
        return $matches;
313
    }
314
315
    protected function findRowMatches($newMatchData, $startInOld, $endInOld, $startInNew, $endInNew, &$matches)
316
    {
317
        $match = $this->findRowMatch($newMatchData, $startInOld, $endInOld, $startInNew, $endInNew);
318
        if ($match !== null) {
319
            if ($startInOld < $match->getStartInOld() &&
320
                $startInNew < $match->getStartInNew()
321
            ) {
322
                $this->findRowMatches(
323
                    $newMatchData,
324
                    $startInOld,
325
                    $match->getStartInOld(),
326
                    $startInNew,
327
                    $match->getStartInNew(),
328
                    $matches
329
                );
330
            }
331
332
            $matches[] = $match;
333
334
            if ($match->getEndInOld() < $endInOld &&
335
                $match->getEndInNew() < $endInNew
336
            ) {
337
                $this->findRowMatches($newMatchData, $match->getEndInOld(), $endInOld, $match->getEndInNew(), $endInNew, $matches);
0 ignored issues
show
Coding Style introduced by
This line exceeds maximum limit of 120 characters; contains 131 characters

Overly long lines are hard to read on any screen. Most code styles therefor impose a maximum limit on the number of characters in a line.

Loading history...
338
            }
339
        }
340
    }
341
342
    protected function findRowMatch($newMatchData, $startInOld, $endInOld, $startInNew, $endInNew)
343
    {
344
        $bestMatch = null;
345
        $bestPercentage = 0;
346
347
        foreach ($newMatchData as $newIndex => $oldMatches) {
348
            if ($newIndex < $startInNew) {
349
                continue;
350
            }
351
352
            if ($newIndex >= $endInNew) {
353
                break;
354
            }
355
            foreach ($oldMatches as $oldIndex => $percentage) {
356
                if ($oldIndex < $startInOld) {
357
                    continue;
358
                }
359
360
                if ($oldIndex >= $endInOld) {
361
                    break;
362
                }
363
364
                if ($percentage > $bestPercentage) {
365
                    $bestPercentage = $percentage;
366
                    $bestMatch = array(
367
                        'oldIndex' => $oldIndex,
368
                        'newIndex' => $newIndex,
369
                        'percentage' => $percentage,
370
                    );
371
                }
372
            }
373
        }
374
375
        if ($bestMatch !== null) {
376
            return new RowMatch($bestMatch['newIndex'], $bestMatch['oldIndex'], $bestMatch['newIndex'] + 1, $bestMatch['oldIndex'] + 1, $bestMatch['percentage']);
0 ignored issues
show
Coding Style introduced by
This line exceeds maximum limit of 120 characters; contains 162 characters

Overly long lines are hard to read on any screen. Most code styles therefor impose a maximum limit on the number of characters in a line.

Loading history...
377
        }
378
379
        return null;
380
    }
381
382
    /**
383
     * @param $oldRows
384
     * @param $newRows
385
     * @param $oldMatchData
386
     */
387
    protected function diffTableRows($oldRows, $newRows, $oldMatchData)
388
    {
389
        $appliedRowSpans = array();
390
        $currentIndexInOld = 0;
391
        $oldCount = count($oldRows);
392
        $newCount = count($newRows);
393
        $difference = max($oldCount, $newCount) - min($oldCount, $newCount);
394
395
        foreach ($newRows as $newIndex => $row) {
396
            $oldRow = $this->oldTable->getRow($currentIndexInOld);
397
398
            if ($oldRow) {
399
                $matchPercentage = $oldMatchData[$currentIndexInOld][$newIndex];
400
401
                // does the old row match better?
402
                $otherMatchBetter = false;
403 View Code Duplication
                foreach ($oldMatchData[$currentIndexInOld] as $index => $percentage) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
404
                    if ($index > $newIndex && $percentage > $matchPercentage) {
405
                        $otherMatchBetter = $index;
406
                    }
407
                }
408
409
                if (false !== $otherMatchBetter && $newCount > $oldCount && $difference > 0) {
410
                    // insert row as new
411
                    $this->diffAndAppendRows(null, $row, $appliedRowSpans);
412
                    $difference--;
413
414
                    continue;
415
                }
416
417
                $nextOldIndex = array_key_exists($currentIndexInOld + 1, $oldRows) ? $currentIndexInOld + 1 : null;
418
419
                $replacement = false;
420
421
                if ($nextOldIndex !== null &&
422
                    $oldMatchData[$nextOldIndex][$newIndex] > $matchPercentage &&
423
                    $oldMatchData[$nextOldIndex][$newIndex] > $this->matchThreshold
424
                ) {
425
                    // Following row in old is better match, use that.
426
                    $this->diffAndAppendRows($oldRows[$currentIndexInOld], null, $appliedRowSpans, true);
427
428
                    $currentIndexInOld++;
429
                    $matchPercentage = $oldMatchData[$currentIndexInOld];
0 ignored issues
show
Unused Code introduced by
$matchPercentage is not used, you could remove the assignment.

This check looks for variable assignements that are either overwritten by other assignments or where the variable is not used subsequently.

$myVar = 'Value';
$higher = false;

if (rand(1, 6) > 3) {
    $higher = true;
} else {
    $higher = false;
}

Both the $myVar assignment in line 1 and the $higher assignment in line 2 are dead. The first because $myVar is never used and the second because $higher is always overwritten for every possible time line.

Loading history...
430
                    $replacement = true;
431
                }
432
433
                $this->diffAndAppendRows($oldRows[$currentIndexInOld], $row, $appliedRowSpans, $replacement);
434
                $currentIndexInOld++;
435
            } else {
436
                $this->diffAndAppendRows(null, $row, $appliedRowSpans);
437
            }
438
        }
439
440
        if (count($oldRows) > count($newRows)) {
441
            foreach (array_slice($oldRows, count($newRows)) as $row) {
442
                $this->diffAndAppendRows($row, null, $appliedRowSpans);
443
            }
444
        }
445
    }
446
447
    /**
448
     * @param TableRow|null $oldRow
449
     * @param TableRow|null $newRow
450
     * @param array         $appliedRowSpans
451
     * @param bool          $forceExpansion
452
     *
453
     * @return \DOMNode
0 ignored issues
show
Documentation introduced by
Should the return type not be array?

This check compares the return type specified in the @return annotation of a function or method doc comment with the types returned by the function and raises an issue if they mismatch.

Loading history...
454
     */
455
    protected function diffRows($oldRow, $newRow, array &$appliedRowSpans, $forceExpansion = false)
456
    {
457
        // create tr dom element
458
        $rowToClone = $newRow ?: $oldRow;
459
        $diffRow = $this->diffDom->importNode($rowToClone->getDomNode()->cloneNode(false), false);
460
461
        $oldCells = $oldRow ? $oldRow->getCells() : array();
462
        $newCells = $newRow ? $newRow->getCells() : array();
463
464
        $position = new DiffRowPosition();
465
466
        $extraRow = null;
467
468
        $expandCells = array();
469
        $cellsWithMultipleRows = array();
470
471
        // @todo: Do cell matching
472
473
        $newCellCount = count($newCells);
474
        while ($position->getIndexInNew() < $newCellCount) {
475
            if (!$position->areColumnsEqual()) {
476
                $type = $position->getLesserColumnType();
477
                if ($type === 'new') {
478
                    $row = $newRow;
479
                    $targetRow = $extraRow;
480
                } else {
481
                    $row = $oldRow;
482
                    $targetRow = $diffRow;
483
                }
484
                if ($row && (!$type === 'old' || isset($oldCells[$position->getIndexInOld()]))) {
485
                    $this->syncVirtualColumns($row, $position, $cellsWithMultipleRows, $targetRow, $type, true);
486
487
                    continue;
488
                }
489
            }
490
491
            /* @var $newCell TableCell */
492
            $newCell = $newCells[$position->getIndexInNew()];
493
            /* @var $oldCell TableCell */
494
            $oldCell = isset($oldCells[$position->getIndexInOld()]) ? $oldCells[$position->getIndexInOld()] : null;
495
496
            if ($oldCell && $newCell->getColspan() != $oldCell->getColspan()) {
497
                if (null === $extraRow) {
498
                    $extraRow = $this->diffDom->importNode($rowToClone->getDomNode()->cloneNode(false), false);
499
                }
500
501
                // @todo: How do we handle cells that have both rowspan and colspan?
502
503
                if ($oldCell->getColspan() > $newCell->getColspan()) {
504
                    $this->diffCellsAndIncrementCounters(
505
                        $oldCell,
506
                        null,
507
                        $cellsWithMultipleRows,
508
                        $diffRow,
509
                        $position,
510
                        true
511
                    );
512
                    $this->syncVirtualColumns($newRow, $position, $cellsWithMultipleRows, $extraRow, 'new', true);
513
                } else {
514
                    $this->diffCellsAndIncrementCounters(
515
                        null,
516
                        $newCell,
517
                        $cellsWithMultipleRows,
518
                        $extraRow,
519
                        $position,
520
                        true
521
                    );
522
                    $this->syncVirtualColumns($oldRow, $position, $cellsWithMultipleRows, $diffRow, 'old', true);
523
                }
524
            } else {
525
                $diffCell = $this->diffCellsAndIncrementCounters(
526
                    $oldCell,
527
                    $newCell,
528
                    $cellsWithMultipleRows,
529
                    $diffRow,
530
                    $position
531
                );
532
                $expandCells[] = $diffCell;
533
            }
534
        }
535
536
        $oldCellCount = count($oldCells);
537
        while ($position->getIndexInOld() < $oldCellCount) {
538
            $diffCell = $this->diffCellsAndIncrementCounters(
539
                $oldCells[$position->getIndexInOld()],
540
                null,
541
                $cellsWithMultipleRows,
542
                $diffRow,
543
                $position
544
            );
545
            $expandCells[] = $diffCell;
546
        }
547
548
        if ($extraRow) {
549
            foreach ($expandCells as $expandCell) {
550
                $expandCell->setAttribute('rowspan', $expandCell->getAttribute('rowspan') + 1);
551
            }
552
        }
553
554
        if ($extraRow || $forceExpansion) {
555
            foreach ($appliedRowSpans as $rowSpanCells) {
556
                foreach ($rowSpanCells as $extendCell) {
557
                    $extendCell->setAttribute('rowspan', $extendCell->getAttribute('rowspan') + 1);
558
                }
559
            }
560
        }
561
562
        if (!$forceExpansion) {
563
            array_shift($appliedRowSpans);
564
            $appliedRowSpans = array_values($appliedRowSpans);
565
        }
566
        $appliedRowSpans = array_merge($appliedRowSpans, array_values($cellsWithMultipleRows));
567
568
        return array($diffRow, $extraRow);
569
    }
570
571
    /**
572
     * @param TableCell|null $oldCell
573
     * @param TableCell|null $newCell
574
     *
575
     * @return \DOMElement
576
     */
577
    protected function getNewCellNode(TableCell $oldCell = null, TableCell $newCell = null)
578
    {
579
        // If only one cell exists, use it
580
        if (!$oldCell || !$newCell) {
581
            $clone = $newCell
582
                ? $newCell->getDomNode()->cloneNode(false)
583
                : $oldCell->getDomNode()->cloneNode(false);
0 ignored issues
show
Bug introduced by
It seems like $oldCell is not always an object, but can also be of type null. Maybe add an additional type check?

If a variable is not always an object, we recommend to add an additional type check to ensure your method call is safe:

function someFunction(A $objectMaybe = null)
{
    if ($objectMaybe instanceof A) {
        $objectMaybe->doSomething();
    }
}
Loading history...
584
        } else {
585
            $oldNode = $oldCell->getDomNode();
586
            $newNode = $newCell->getDomNode();
587
588
            $clone = $newNode->cloneNode(false);
589
590
            $oldRowspan = $oldNode->getAttribute('rowspan') ?: 1;
591
            $oldColspan = $oldNode->getAttribute('colspan') ?: 1;
592
            $newRowspan = $newNode->getAttribute('rowspan') ?: 1;
593
            $newColspan = $newNode->getAttribute('colspan') ?: 1;
594
595
            $clone->setAttribute('rowspan', max($oldRowspan, $newRowspan));
596
            $clone->setAttribute('colspan', max($oldColspan, $newColspan));
597
        }
598
599
        return $this->diffDom->importNode($clone);
600
    }
601
602
    protected function diffCells($oldCell, $newCell, $usingExtraRow = false)
603
    {
604
        $diffCell = $this->getNewCellNode($oldCell, $newCell);
605
606
        $oldContent = $oldCell ? $this->getInnerHtml($oldCell->getDomNode()) : '';
607
        $newContent = $newCell ? $this->getInnerHtml($newCell->getDomNode()) : '';
608
609
        $htmlDiff = new HtmlDiff(
610
            mb_convert_encoding($oldContent, 'UTF-8', 'HTML-ENTITIES'),
611
            mb_convert_encoding($newContent, 'UTF-8', 'HTML-ENTITIES'),
612
            $this->encoding,
613
            $this->specialCaseTags,
614
            $this->groupDiffs
615
        );
616
        $htmlDiff->setMatchThreshold($this->matchThreshold);
617
        $diff = $htmlDiff->build();
618
619
        $this->setInnerHtml($diffCell, $diff);
620
621
        if (null === $newCell) {
622
            $diffCell->setAttribute('class', trim($diffCell->getAttribute('class').' del'));
623
        }
624
625
        if (null === $oldCell) {
626
            $diffCell->setAttribute('class', trim($diffCell->getAttribute('class').' ins'));
627
        }
628
629
        if ($usingExtraRow) {
630
            $diffCell->setAttribute('class', trim($diffCell->getAttribute('class').' extra-row'));
631
        }
632
633
        return $diffCell;
634
    }
635
636
    protected function buildTableDoms()
637
    {
638
        $this->oldTable = $this->parseTableStructure(mb_convert_encoding($this->oldText, 'HTML-ENTITIES', 'UTF-8'));
639
        $this->newTable = $this->parseTableStructure(mb_convert_encoding($this->newText, 'HTML-ENTITIES', 'UTF-8'));
640
    }
641
642
    protected function parseTableStructure($text)
643
    {
644
        $dom = new \DOMDocument();
645
        $dom->loadHTML($text);
646
647
        $tableNode = $dom->getElementsByTagName('table')->item(0);
648
649
        $table = new Table($tableNode);
650
651
        $this->parseTable($table);
652
653
        return $table;
654
    }
655
656
    protected function parseTable(Table $table, \DOMNode $node = null)
657
    {
658
        if ($node === null) {
659
            $node = $table->getDomNode();
660
        }
661
662
        foreach ($node->childNodes as $child) {
663
            if ($child->nodeName === 'tr') {
664
                $row = new TableRow($child);
665
                $table->addRow($row);
666
667
                $this->parseTableRow($row);
668
            } else {
669
                $this->parseTable($table, $child);
670
            }
671
        }
672
    }
673
674
    protected function parseTableRow(TableRow $row)
675
    {
676
        $node = $row->getDomNode();
677
678
        foreach ($node->childNodes as $child) {
679
            if (in_array($child->nodeName, array('td', 'th'))) {
680
                $cell = new TableCell($child);
681
                $row->addCell($cell);
682
            }
683
        }
684
    }
685
686
    protected function getInnerHtml($node)
687
    {
688
        $innerHtml = '';
689
        $children = $node->childNodes;
690
691
        foreach ($children as $child) {
692
            $innerHtml .= $this->htmlFromNode($child);
693
        }
694
695
        return $innerHtml;
696
    }
697
698 View Code Duplication
    protected function htmlFromNode($node)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
699
    {
700
        $domDocument = new \DOMDocument();
701
        $newNode = $domDocument->importNode($node, true);
702
        $domDocument->appendChild($newNode);
703
704
        return trim($domDocument->saveHTML());
705
    }
706
707
    protected function setInnerHtml($node, $html)
708
    {
709
        // DOMDocument::loadHTML does not allow empty strings.
710
        if (strlen($html) === 0) {
711
            $html = '<span class="empty"></span>';
712
        }
713
714
        $doc = new \DOMDocument();
715
        $doc->loadHTML(mb_convert_encoding($this->purifier->purify($html), 'HTML-ENTITIES', 'UTF-8'));
716
        $fragment = $node->ownerDocument->createDocumentFragment();
717
        $root = $doc->getElementsByTagName('body')->item(0);
718
        foreach ($root->childNodes as $child) {
719
            $fragment->appendChild($node->ownerDocument->importNode($child, true));
720
        }
721
722
        $node->appendChild($fragment);
723
    }
724
725
    protected function indexCellValues(Table $table)
726
    {
727
        foreach ($table->getRows() as $rowIndex => $row) {
728
            foreach ($row->getCells() as $cellIndex => $cell) {
729
                $value = trim($cell->getDomNode()->textContent);
730
731
                if (!isset($this->cellValues[$value])) {
732
                    $this->cellValues[$value] = array();
733
                }
734
735
                $this->cellValues[$value][] = new TablePosition($rowIndex, $cellIndex);
736
            }
737
        }
738
    }
739
740
    /**
741
     * @param        $tableRow
742
     * @param        $currentColumn
743
     * @param        $targetColumn
744
     * @param        $currentCell
745
     * @param        $cellsWithMultipleRows
746
     * @param        $diffRow
747
     * @param        $currentIndex
748
     * @param string $diffType
749
     */
750
    protected function syncVirtualColumns(
751
        $tableRow,
752
        DiffRowPosition $position,
753
        &$cellsWithMultipleRows,
754
        $diffRow,
755
        $diffType,
756
        $usingExtraRow = false
757
    ) {
758
        $currentCell = $tableRow->getCell($position->getIndex($diffType));
759
        while ($position->isColumnLessThanOther($diffType) && $currentCell) {
760
            $diffCell = $diffType === 'new' ? $this->diffCells(null, $currentCell, $usingExtraRow) : $this->diffCells(
761
                $currentCell,
762
                null,
763
                $usingExtraRow
764
            );
765
            // Store cell in appliedRowSpans if spans multiple rows
766
            if ($diffCell->getAttribute('rowspan') > 1) {
767
                $cellsWithMultipleRows[$diffCell->getAttribute('rowspan')][] = $diffCell;
768
            }
769
            $diffRow->appendChild($diffCell);
770
            $position->incrementColumn($diffType, $currentCell->getColspan());
771
            $currentCell = $tableRow->getCell($position->incrementIndex($diffType));
772
        }
773
    }
774
775
    /**
776
     * @param null|TableCell  $oldCell
777
     * @param null|TableCell  $newCell
778
     * @param array           $cellsWithMultipleRows
779
     * @param \DOMElement     $diffRow
780
     * @param DiffRowPosition $position
781
     * @param bool            $usingExtraRow
782
     *
783
     * @return \DOMElement
784
     */
785
    protected function diffCellsAndIncrementCounters(
786
        $oldCell,
787
        $newCell,
788
        &$cellsWithMultipleRows,
789
        $diffRow,
790
        DiffRowPosition $position,
791
        $usingExtraRow = false
792
    ) {
793
        $diffCell = $this->diffCells($oldCell, $newCell, $usingExtraRow);
794
        // Store cell in appliedRowSpans if spans multiple rows
795
        if ($diffCell->getAttribute('rowspan') > 1) {
796
            $cellsWithMultipleRows[$diffCell->getAttribute('rowspan')][] = $diffCell;
797
        }
798
        $diffRow->appendChild($diffCell);
799
800
        if ($newCell !== null) {
801
            $position->incrementIndexInNew();
802
            $position->incrementColumnInNew($newCell->getColspan());
803
        }
804
805
        if ($oldCell !== null) {
806
            $position->incrementIndexInOld();
807
            $position->incrementColumnInOld($oldCell->getColspan());
808
        }
809
810
        return $diffCell;
811
    }
812
813
    /**
814
     * @param      $oldRow
815
     * @param      $newRow
816
     * @param      $appliedRowSpans
817
     * @param bool $forceExpansion
818
     */
819
    protected function diffAndAppendRows($oldRow, $newRow, &$appliedRowSpans, $forceExpansion = false)
820
    {
821
        list($rowDom, $extraRow) = $this->diffRows(
822
            $oldRow,
823
            $newRow,
824
            $appliedRowSpans,
825
            $forceExpansion
826
        );
827
828
        $this->diffTable->appendChild($rowDom);
0 ignored issues
show
Bug introduced by
The method appendChild() does not seem to exist on object<Caxy\HtmlDiff\Table\Table>.

This check looks for calls to methods that do not seem to exist on a given type. It looks for the method on the type itself as well as in inherited classes or implemented interfaces.

This is most likely a typographical error or the method has been renamed.

Loading history...
829
830
        if ($extraRow) {
831
            $this->diffTable->appendChild($extraRow);
0 ignored issues
show
Bug introduced by
The method appendChild() does not seem to exist on object<Caxy\HtmlDiff\Table\Table>.

This check looks for calls to methods that do not seem to exist on a given type. It looks for the method on the type itself as well as in inherited classes or implemented interfaces.

This is most likely a typographical error or the method has been renamed.

Loading history...
832
        }
833
    }
834
835
    protected function getMatchPercentage(TableRow $oldRow, TableRow $newRow)
836
    {
837
        $firstCellWeight = 3;
838
        $thresholdCount = 0;
839
        $totalCount = (min(count($newRow->getCells()), count($oldRow->getCells())) + $firstCellWeight) * 100;
840
        foreach ($newRow->getCells() as $newIndex => $newCell) {
841
            $oldCell = $oldRow->getCell($newIndex);
0 ignored issues
show
Bug introduced by
Are you sure the assignment to $oldCell is correct as $oldRow->getCell($newIndex) (which targets Caxy\HtmlDiff\Table\TableRow::getCell()) seems to always return null.

This check looks for function or method calls that always return null and whose return value is assigned to a variable.

class A
{
    function getObject()
    {
        return null;
    }

}

$a = new A();
$object = $a->getObject();

The method getObject() can return nothing but null, so it makes no sense to assign that value to a variable.

The reason is most likely that a function or method is imcomplete or has been reduced for debug purposes.

Loading history...
842
843
            if ($oldCell) {
844
                $percentage = null;
845
                similar_text($oldCell->getInnerHtml(), $newCell->getInnerHtml(), $percentage);
846
847
                if ($percentage > ($this->matchThreshold * 0.50)) {
848
                    $increment = $percentage;
849
                    if ($newIndex === 0 && $percentage > 95) {
850
                        $increment = $increment * $firstCellWeight;
851
                    }
852
                    $thresholdCount += $increment;
853
                }
854
            }
855
        }
856
857
        $matchPercentage = ($totalCount > 0) ? ($thresholdCount / $totalCount) : 0;
858
859
        return $matchPercentage;
860
    }
861
}
862