Passed
Pull Request — master (#31)
by Josh
04:13
created

TableDiff::__construct()   A

Complexity

Conditions 1
Paths 1

Size

Total Lines 7
Code Lines 4

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 0
CRAP Score 2

Importance

Changes 3
Bugs 0 Features 1
Metric Value
c 3
b 0
f 1
dl 0
loc 7
ccs 0
cts 5
cp 0
rs 9.4285
cc 1
eloc 4
nc 1
nop 5
crap 2
1
<?php
2
3
namespace Caxy\HtmlDiff\Table;
4
5
use Caxy\HtmlDiff\AbstractDiff;
6
use Caxy\HtmlDiff\HtmlDiff;
7
use Caxy\HtmlDiff\Operation;
8
9
/**
10
 * @todo Add getters to TableMatch entity
11
 * @todo Move applicable functions to new table classes
12
 * @todo find matches of row/cells in order to handle row/cell additions/deletions
13
 * @todo clean up way to iterate between new and old cells
14
 * @todo Make sure diffed table keeps <tbody> or other table structure elements
15
 * @todo Encoding
16
 */
17
class TableDiff extends AbstractDiff
18
{
19
    /**
20
     * @var null|Table
21
     */
22
    protected $oldTable = null;
23
24
    /**
25
     * @var null|Table
26
     */
27
    protected $newTable = null;
28
29
    /**
30
     * @var null|Table
31
     */
32
    protected $diffTable = null;
33
34
    /**
35
     * @var null|\DOMDocument
36
     */
37
    protected $diffDom = null;
38
39
    /**
40
     * @var int
41
     */
42
    protected $newRowOffsets = 0;
43
44
    /**
45
     * @var int
46
     */
47
    protected $oldRowOffsets = 0;
48
49
    /**
50
     * @var array
51
     */
52
    protected $cellValues = array();
53
54
    /**
55
     * @var \HTMLPurifier
56
     */
57
    protected $purifier;
58
59
    public function __construct($oldText, $newText, $encoding, $specialCaseTags, $groupDiffs)
60
    {
61
        parent::__construct($oldText, $newText, $encoding, $specialCaseTags, $groupDiffs);
62
63
        $config = \HTMLPurifier_Config::createDefault();
64
        $this->purifier = new \HTMLPurifier($config);
65
    }
66
67
    public function build()
68
    {
69
        $this->buildTableDoms();
70
71
        $this->diffDom = new \DOMDocument();
72
73
//        $this->normalizeFormat();
0 ignored issues
show
Unused Code Comprehensibility introduced by
72% of this comment could be valid code. Did you maybe forget this after debugging?

Sometimes obsolete code just ends up commented out instead of removed. In this case it is better to remove the code once you have checked you do not need it.

The code might also have been commented out for debugging purposes. In this case it is vital that someone uncomments it again or your project may behave in very unexpected ways in production.

This check looks for comments that seem to be mostly valid code and reports them.

Loading history...
74
75
        $this->indexCellValues($this->newTable);
0 ignored issues
show
Bug introduced by
It seems like $this->newTable can be null; however, indexCellValues() does not accept null, maybe add an additional type check?

Unless you are absolutely sure that the expression can never be null because of other conditions, we strongly recommend to add an additional type check to your code:

/** @return stdClass|null */
function mayReturnNull() { }

function doesNotAcceptNull(stdClass $x) { }

// With potential error.
function withoutCheck() {
    $x = mayReturnNull();
    doesNotAcceptNull($x); // Potential error here.
}

// Safe - Alternative 1
function withCheck1() {
    $x = mayReturnNull();
    if ( ! $x instanceof stdClass) {
        throw new \LogicException('$x must be defined.');
    }
    doesNotAcceptNull($x);
}

// Safe - Alternative 2
function withCheck2() {
    $x = mayReturnNull();
    if ($x instanceof stdClass) {
        doesNotAcceptNull($x);
    }
}
Loading history...
76
77
        $this->diffTableContent();
78
79
        return $this->content;
80
    }
81
82
    protected function normalizeFormat()
83
    {
84
        $oldRows = $this->oldTable->getRows();
85
        $newRows = $this->newTable->getRows();
86
87
        foreach ($newRows as $rowIndex => $newRow) {
88
            $oldRow = isset($oldRows[$rowIndex]) ? $oldRows[$rowIndex] : null;
89
90
            if (!$oldRow) {
91
                continue;
92
            }
93
94
            $newRowOffset = 0;
95
            $oldRowOffset = 0;
96
97
            $newCells = $newRow->getCells();
98
            $oldCells = $oldRow->getCells();
99
100
            foreach ($newCells as $cellIndex => $newCell) {
101
                $oldCell = isset($oldCells[$cellIndex]) ? $oldCells[$cellIndex] : null;
102
103
                if ($oldCell) {
104
                    $oldNode = $oldCell->getDomNode();
105
                    $newNode = $newCell->getDomNode();
106
107
                    $oldRowspan = $oldNode->getAttribute('rowspan') ?: 1;
108
                    $newRowspan = $newNode->getAttribute('rowspan') ?: 1;
109
110
                    if ($oldRowspan > $newRowspan) {
111
                        // add placeholders in next row of new rows
112
                        $offset = $oldRowspan - $newRowspan;
113
                        if ($offset > $newRowOffset) {
114
                            $newRowOffset = $offset;
115
                        }
116
                    } elseif ($newRowspan > $oldRowspan) {
117
                        $offset = $newRowspan - $oldRowspan;
118
                        if ($offset > $oldRowOffset) {
119
                            $oldRowOffset = $offset;
120
                        }
121
                    }
122
                }
123
            }
124
125
            if ($oldRowOffset > 0 && isset($newRows[$rowIndex + 1])) {
126
                $blankRow = $this->diffDom->createElement('tr');
127
128
                $insertArray = array();
129
                for ($i = 0; $i < $oldRowOffset; $i++) {
130
                    $insertArray[] = new TableRow($blankRow);
131
                }
132
133
                $this->oldTable->insertRows($insertArray, $rowIndex + 1);
134
            } elseif ($newRowOffset > 0 && isset($newRows[$rowIndex + 1])) {
135
                $blankRow = $this->diffDom->createElement('tr');
136
137
                $insertArray = array();
138
                for ($i = 0; $i < $newRowOffset; $i++) {
139
                    $insertArray[] = new TableRow($blankRow);
140
                }
141
                $this->newTable->insertRows($insertArray, $rowIndex + 1);
142
            }
143
        }
144
    }
145
146
    protected function diffTableContent()
147
    {
148
        $this->diffDom = new \DOMDocument();
149
        $this->diffTable = $this->diffDom->importNode($this->newTable->getDomNode()->cloneNode(false), false);
0 ignored issues
show
Documentation Bug introduced by
It seems like $this->diffDom->importNo...loneNode(false), false) of type object<DOMNode> is incompatible with the declared type null|object<Caxy\HtmlDiff\Table\Table> of property $diffTable.

Our type inference engine has found an assignment to a property that is incompatible with the declared type of that property.

Either this assignment is in error or the assigned type should be added to the documentation/type hint for that property..

Loading history...
150
        $this->diffDom->appendChild($this->diffTable);
151
152
        $oldRows = $this->oldTable->getRows();
153
        $newRows = $this->newTable->getRows();
154
155
        $oldMatchData = array();
156
        $newMatchData = array();
157
158
        /* @var $oldRow TableRow */
159
        foreach ($oldRows as $oldIndex => $oldRow) {
160
            $oldMatchData[$oldIndex] = array();
161
162
            // Get match percentages
163
            /* @var $newRow TableRow */
164
            foreach ($newRows as $newIndex => $newRow) {
165
                if (!array_key_exists($newIndex, $newMatchData)) {
166
                    $newMatchData[$newIndex] = array();
167
                }
168
169
                // similar_text
170
                $percentage = $this->getMatchPercentage($oldRow, $newRow, $oldIndex, $newIndex);
171
172
                $oldMatchData[$oldIndex][$newIndex] = $percentage;
173
                $newMatchData[$newIndex][$oldIndex] = $percentage;
174
            }
175
        }
176
177
        $matches = $this->getRowMatches($oldMatchData, $newMatchData);
178
        $this->diffTableRowsWithMatches($oldRows, $newRows, $matches);
179
180
        $this->content = $this->htmlFromNode($this->diffTable);
181
    }
182
183
    /**
184
     * @param TableRow[] $oldRows
185
     * @param TableRow[] $newRows
186
     * @param RowMatch[] $matches
187
     */
188
    protected function diffTableRowsWithMatches($oldRows, $newRows, $matches)
189
    {
190
        $operations = array();
191
192
        $indexInOld = 0;
193
        $indexInNew = 0;
194
195
        $oldRowCount = count($oldRows);
196
        $newRowCount = count($newRows);
197
198
        $matches[] = new RowMatch($newRowCount, $oldRowCount, $newRowCount, $oldRowCount);
199
200
        // build operations
201
        foreach ($matches as $match) {
202
            $matchAtIndexInOld = $indexInOld === $match->getStartInOld();
203
            $matchAtIndexInNew = $indexInNew === $match->getStartInNew();
204
205
            $action = 'equal';
206
207
            if (!$matchAtIndexInOld && !$matchAtIndexInNew) {
208
                $action = 'replace';
209
            } elseif ($matchAtIndexInOld && !$matchAtIndexInNew) {
210
                $action = 'insert';
211
            } elseif (!$matchAtIndexInOld && $matchAtIndexInNew) {
212
                $action = 'delete';
213
            }
214
215
            if ($action !== 'equal') {
216
                $operations[] = new Operation($action, $indexInOld, $match->getStartInOld(), $indexInNew, $match->getStartInNew());
0 ignored issues
show
Coding Style introduced by
This line exceeds maximum limit of 120 characters; contains 131 characters

Overly long lines are hard to read on any screen. Most code styles therefor impose a maximum limit on the number of characters in a line.

Loading history...
217
            }
218
219
            $operations[] = new Operation('equal', $match->getStartInOld(), $match->getEndInOld(), $match->getStartInNew(), $match->getEndInNew());
0 ignored issues
show
Coding Style introduced by
This line exceeds maximum limit of 120 characters; contains 147 characters

Overly long lines are hard to read on any screen. Most code styles therefor impose a maximum limit on the number of characters in a line.

Loading history...
220
221
            $indexInOld = $match->getEndInOld();
222
            $indexInNew = $match->getEndInNew();
223
        }
224
225
        $appliedRowSpans = array();
226
227
        // process operations
228
        foreach ($operations as $operation) {
229
            switch ($operation->action) {
230
                case 'equal':
231
                    $this->processEqualOperation($operation, $oldRows, $newRows, $appliedRowSpans);
232
                    break;
233
234
                case 'delete':
235
                    $this->processDeleteOperation($operation, $oldRows, $appliedRowSpans);
236
                    break;
237
238
                case 'insert':
239
                    $this->processInsertOperation($operation, $newRows, $appliedRowSpans);
240
                    break;
241
242
                case 'replace':
243
                    $this->processReplaceOperation($operation, $oldRows, $newRows, $appliedRowSpans);
244
                    break;
245
            }
246
        }
247
    }
248
249 View Code Duplication
    protected function processInsertOperation(Operation $operation, $newRows, &$appliedRowSpans, $forceExpansion = false)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
Coding Style introduced by
This line exceeds maximum limit of 120 characters; contains 121 characters

Overly long lines are hard to read on any screen. Most code styles therefor impose a maximum limit on the number of characters in a line.

Loading history...
250
    {
251
        $targetRows = array_slice($newRows, $operation->startInNew, $operation->endInNew - $operation->startInNew);
252
        foreach ($targetRows as $row) {
253
            $this->diffAndAppendRows(null, $row, $appliedRowSpans, $forceExpansion);
254
        }
255
    }
256
257 View Code Duplication
    protected function processDeleteOperation(Operation $operation, $oldRows, &$appliedRowSpans, $forceExpansion = false)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
Coding Style introduced by
This line exceeds maximum limit of 120 characters; contains 121 characters

Overly long lines are hard to read on any screen. Most code styles therefor impose a maximum limit on the number of characters in a line.

Loading history...
258
    {
259
        $targetRows = array_slice($oldRows, $operation->startInOld, $operation->endInOld - $operation->startInOld);
260
        foreach ($targetRows as $row) {
261
            $this->diffAndAppendRows($row, null, $appliedRowSpans, $forceExpansion);
262
        }
263
    }
264
265
    protected function processEqualOperation(Operation $operation, $oldRows, $newRows, &$appliedRowSpans)
266
    {
267
        $targetOldRows = array_values(array_slice($oldRows, $operation->startInOld, $operation->endInOld - $operation->startInOld));
0 ignored issues
show
Coding Style introduced by
This line exceeds maximum limit of 120 characters; contains 132 characters

Overly long lines are hard to read on any screen. Most code styles therefor impose a maximum limit on the number of characters in a line.

Loading history...
268
        $targetNewRows = array_values(array_slice($newRows, $operation->startInNew, $operation->endInNew - $operation->startInNew));
0 ignored issues
show
Coding Style introduced by
This line exceeds maximum limit of 120 characters; contains 132 characters

Overly long lines are hard to read on any screen. Most code styles therefor impose a maximum limit on the number of characters in a line.

Loading history...
269
270
        foreach ($targetNewRows as $index => $newRow) {
271
            if (!isset($targetOldRows[$index])) {
272
                continue;
273
            }
274
275
            $this->diffAndAppendRows($targetOldRows[$index], $newRow, $appliedRowSpans);
276
        }
277
    }
278
279
    protected function processReplaceOperation(Operation $operation, $oldRows, $newRows, &$appliedRowSpans)
280
    {
281
        $this->processDeleteOperation($operation, $oldRows, $appliedRowSpans, true);
282
        $this->processInsertOperation($operation, $newRows, $appliedRowSpans, true);
283
    }
284
285
    protected function getRowMatches($oldMatchData, $newMatchData)
0 ignored issues
show
Documentation introduced by
The return type could not be reliably inferred; please add a @return annotation.

Our type inference engine in quite powerful, but sometimes the code does not provide enough clues to go by. In these cases we request you to add a @return annotation as described here.

Loading history...
286
    {
287
        $matches = array();
288
289
        $startInOld = 0;
290
        $startInNew = 0;
291
        $endInOld = count($oldMatchData);
292
        $endInNew = count($newMatchData);
293
294
        $this->findRowMatches($newMatchData, $startInOld, $endInOld, $startInNew, $endInNew, $matches);
295
296
        return $matches;
297
    }
298
299
    protected function findRowMatches($newMatchData, $startInOld, $endInOld, $startInNew, $endInNew, &$matches)
300
    {
301
        $match = $this->findRowMatch($newMatchData, $startInOld, $endInOld, $startInNew, $endInNew);
302
        if ($match !== null) {
303
            if ($startInOld < $match->getStartInOld() &&
304
                $startInNew < $match->getStartInNew()
305
            ) {
306
                $this->findRowMatches(
307
                    $newMatchData,
308
                    $startInOld,
309
                    $match->getStartInOld(),
310
                    $startInNew,
311
                    $match->getStartInNew(),
312
                    $matches
313
                );
314
            }
315
316
            $matches[] = $match;
317
318
            if ($match->getEndInOld() < $endInOld &&
319
                $match->getEndInNew() < $endInNew
320
            ) {
321
                $this->findRowMatches($newMatchData, $match->getEndInOld(), $endInOld, $match->getEndInNew(), $endInNew, $matches);
0 ignored issues
show
Coding Style introduced by
This line exceeds maximum limit of 120 characters; contains 131 characters

Overly long lines are hard to read on any screen. Most code styles therefor impose a maximum limit on the number of characters in a line.

Loading history...
322
            }
323
        }
324
    }
325
326
    protected function findRowMatch($newMatchData, $startInOld, $endInOld, $startInNew, $endInNew)
327
    {
328
        $bestMatch = null;
329
        $bestPercentage = 0;
330
331
        foreach ($newMatchData as $newIndex => $oldMatches) {
332
            if ($newIndex < $startInNew) {
333
                continue;
334
            }
335
336
            if ($newIndex >= $endInNew) {
337
                break;
338
            }
339
            foreach ($oldMatches as $oldIndex => $percentage) {
340
                if ($oldIndex < $startInOld) {
341
                    continue;
342
                }
343
344
                if ($oldIndex >= $endInOld) {
345
                    break;
346
                }
347
348
                if ($percentage > $bestPercentage) {
349
                    $bestPercentage = $percentage;
350
                    $bestMatch = array(
351
                        'oldIndex' => $oldIndex,
352
                        'newIndex' => $newIndex,
353
                        'percentage' => $percentage,
354
                    );
355
                }
356
            }
357
        }
358
359
        if ($bestMatch !== null) {
360
            return new RowMatch($bestMatch['newIndex'], $bestMatch['oldIndex'], $bestMatch['newIndex'] + 1, $bestMatch['oldIndex'] + 1, $bestMatch['percentage']);
0 ignored issues
show
Coding Style introduced by
This line exceeds maximum limit of 120 characters; contains 162 characters

Overly long lines are hard to read on any screen. Most code styles therefor impose a maximum limit on the number of characters in a line.

Loading history...
361
        }
362
363
        return null;
364
    }
365
366
    /**
367
     * @param TableRow|null $oldRow
368
     * @param TableRow|null $newRow
369
     * @param array         $appliedRowSpans
370
     * @param bool          $forceExpansion
371
     *
372
     * @return \DOMNode
0 ignored issues
show
Documentation introduced by
Should the return type not be array?

This check compares the return type specified in the @return annotation of a function or method doc comment with the types returned by the function and raises an issue if they mismatch.

Loading history...
373
     */
374
    protected function diffRows($oldRow, $newRow, array &$appliedRowSpans, $forceExpansion = false)
375
    {
376
        // create tr dom element
377
        $rowToClone = $newRow ?: $oldRow;
378
        $diffRow = $this->diffDom->importNode($rowToClone->getDomNode()->cloneNode(false), false);
379
380
        $oldCells = $oldRow ? $oldRow->getCells() : array();
381
        $newCells = $newRow ? $newRow->getCells() : array();
382
383
        $position = new DiffRowPosition();
384
385
        $extraRow = null;
386
387
        $expandCells = array();
388
        $cellsWithMultipleRows = array();
389
390
        // @todo: Do cell matching
391
392
        $newCellCount = count($newCells);
393
        while ($position->getIndexInNew() < $newCellCount) {
394
            if (!$position->areColumnsEqual()) {
395
                $type = $position->getLesserColumnType();
396
                if ($type === 'new') {
397
                    $row = $newRow;
398
                    $targetRow = $extraRow;
399
                } else {
400
                    $row = $oldRow;
401
                    $targetRow = $diffRow;
402
                }
403
                if ($row && (!$type === 'old' || isset($oldCells[$position->getIndexInOld()]))) {
404
                    $this->syncVirtualColumns($row, $position, $cellsWithMultipleRows, $targetRow, $type, true);
405
406
                    continue;
407
                }
408
            }
409
410
            /* @var $newCell TableCell */
411
            $newCell = $newCells[$position->getIndexInNew()];
412
            /* @var $oldCell TableCell */
413
            $oldCell = isset($oldCells[$position->getIndexInOld()]) ? $oldCells[$position->getIndexInOld()] : null;
414
415
            if ($oldCell && $newCell->getColspan() != $oldCell->getColspan()) {
416
                if (null === $extraRow) {
417
                    $extraRow = $this->diffDom->importNode($rowToClone->getDomNode()->cloneNode(false), false);
418
                }
419
420
                // @todo: How do we handle cells that have both rowspan and colspan?
421
422
                if ($oldCell->getColspan() > $newCell->getColspan()) {
423
                    $this->diffCellsAndIncrementCounters(
424
                        $oldCell,
425
                        null,
426
                        $cellsWithMultipleRows,
427
                        $diffRow,
428
                        $position,
429
                        true
430
                    );
431
                    $this->syncVirtualColumns($newRow, $position, $cellsWithMultipleRows, $extraRow, 'new', true);
432
                } else {
433
                    $this->diffCellsAndIncrementCounters(
434
                        null,
435
                        $newCell,
436
                        $cellsWithMultipleRows,
437
                        $extraRow,
438
                        $position,
439
                        true
440
                    );
441
                    $this->syncVirtualColumns($oldRow, $position, $cellsWithMultipleRows, $diffRow, 'old', true);
442
                }
443
            } else {
444
                $diffCell = $this->diffCellsAndIncrementCounters(
445
                    $oldCell,
446
                    $newCell,
447
                    $cellsWithMultipleRows,
448
                    $diffRow,
449
                    $position
450
                );
451
                $expandCells[] = $diffCell;
452
            }
453
        }
454
455
        $oldCellCount = count($oldCells);
456
        while ($position->getIndexInOld() < $oldCellCount) {
457
            $diffCell = $this->diffCellsAndIncrementCounters(
458
                $oldCells[$position->getIndexInOld()],
459
                null,
460
                $cellsWithMultipleRows,
461
                $diffRow,
462
                $position
463
            );
464
            $expandCells[] = $diffCell;
465
        }
466
467
        if ($extraRow) {
468
            foreach ($expandCells as $expandCell) {
469
                $expandCell->setAttribute('rowspan', $expandCell->getAttribute('rowspan') + 1);
470
            }
471
        }
472
473
        if ($extraRow || $forceExpansion) {
474
            foreach ($appliedRowSpans as $rowSpanCells) {
475
                foreach ($rowSpanCells as $extendCell) {
476
                    $extendCell->setAttribute('rowspan', $extendCell->getAttribute('rowspan') + 1);
477
                }
478
            }
479
        }
480
481
        if (!$forceExpansion) {
482
            array_shift($appliedRowSpans);
483
            $appliedRowSpans = array_values($appliedRowSpans);
484
        }
485
        $appliedRowSpans = array_merge($appliedRowSpans, array_values($cellsWithMultipleRows));
486
487
        return array($diffRow, $extraRow);
488
    }
489
490
    /**
491
     * @param TableCell|null $oldCell
492
     * @param TableCell|null $newCell
493
     *
494
     * @return \DOMElement
495
     */
496
    protected function getNewCellNode(TableCell $oldCell = null, TableCell $newCell = null)
497
    {
498
        // If only one cell exists, use it
499
        if (!$oldCell || !$newCell) {
500
            $clone = $newCell
501
                ? $newCell->getDomNode()->cloneNode(false)
502
                : $oldCell->getDomNode()->cloneNode(false);
0 ignored issues
show
Bug introduced by
It seems like $oldCell is not always an object, but can also be of type null. Maybe add an additional type check?

If a variable is not always an object, we recommend to add an additional type check to ensure your method call is safe:

function someFunction(A $objectMaybe = null)
{
    if ($objectMaybe instanceof A) {
        $objectMaybe->doSomething();
    }
}
Loading history...
503
        } else {
504
            $oldNode = $oldCell->getDomNode();
505
            $newNode = $newCell->getDomNode();
506
507
            $clone = $newNode->cloneNode(false);
508
509
            $oldRowspan = $oldNode->getAttribute('rowspan') ?: 1;
510
            $oldColspan = $oldNode->getAttribute('colspan') ?: 1;
511
            $newRowspan = $newNode->getAttribute('rowspan') ?: 1;
512
            $newColspan = $newNode->getAttribute('colspan') ?: 1;
513
514
            $clone->setAttribute('rowspan', max($oldRowspan, $newRowspan));
515
            $clone->setAttribute('colspan', max($oldColspan, $newColspan));
516
        }
517
518
        return $this->diffDom->importNode($clone);
519
    }
520
521
    protected function diffCells($oldCell, $newCell, $usingExtraRow = false)
522
    {
523
        $diffCell = $this->getNewCellNode($oldCell, $newCell);
524
525
        $oldContent = $oldCell ? $this->getInnerHtml($oldCell->getDomNode()) : '';
526
        $newContent = $newCell ? $this->getInnerHtml($newCell->getDomNode()) : '';
527
528
        $htmlDiff = new HtmlDiff(
529
            mb_convert_encoding($oldContent, 'UTF-8', 'HTML-ENTITIES'),
530
            mb_convert_encoding($newContent, 'UTF-8', 'HTML-ENTITIES'),
531
            $this->encoding,
532
            $this->specialCaseTags,
533
            $this->groupDiffs
534
        );
535
        $htmlDiff->setMatchThreshold($this->matchThreshold);
536
        $diff = $htmlDiff->build();
537
538
        $this->setInnerHtml($diffCell, $diff);
539
540
        if (null === $newCell) {
541
            $diffCell->setAttribute('class', trim($diffCell->getAttribute('class').' del'));
542
        }
543
544
        if (null === $oldCell) {
545
            $diffCell->setAttribute('class', trim($diffCell->getAttribute('class').' ins'));
546
        }
547
548
        if ($usingExtraRow) {
549
            $diffCell->setAttribute('class', trim($diffCell->getAttribute('class').' extra-row'));
550
        }
551
552
        return $diffCell;
553
    }
554
555
    protected function buildTableDoms()
556
    {
557
        $this->oldTable = $this->parseTableStructure(mb_convert_encoding($this->oldText, 'HTML-ENTITIES', 'UTF-8'));
558
        $this->newTable = $this->parseTableStructure(mb_convert_encoding($this->newText, 'HTML-ENTITIES', 'UTF-8'));
559
    }
560
561
    protected function parseTableStructure($text)
562
    {
563
        $dom = new \DOMDocument();
564
        $dom->loadHTML($text);
565
566
        $tableNode = $dom->getElementsByTagName('table')->item(0);
567
568
        $table = new Table($tableNode);
0 ignored issues
show
Documentation introduced by
$tableNode is of type object<DOMNode>, but the function expects a null|object<DOMElement>.

It seems like the type of the argument is not accepted by the function/method which you are calling.

In some cases, in particular if PHP’s automatic type-juggling kicks in this might be fine. In other cases, however this might be a bug.

We suggest to add an explicit type cast like in the following example:

function acceptsInteger($int) { }

$x = '123'; // string "123"

// Instead of
acceptsInteger($x);

// we recommend to use
acceptsInteger((integer) $x);
Loading history...
569
570
        $this->parseTable($table);
571
572
        return $table;
573
    }
574
575
    protected function parseTable(Table $table, \DOMNode $node = null)
576
    {
577
        if ($node === null) {
578
            $node = $table->getDomNode();
579
        }
580
581
        foreach ($node->childNodes as $child) {
582
            if ($child->nodeName === 'tr') {
583
                $row = new TableRow($child);
584
                $table->addRow($row);
585
586
                $this->parseTableRow($row);
587
            } else {
588
                $this->parseTable($table, $child);
589
            }
590
        }
591
    }
592
593
    protected function parseTableRow(TableRow $row)
594
    {
595
        $node = $row->getDomNode();
596
597
        foreach ($node->childNodes as $child) {
598
            if (in_array($child->nodeName, array('td', 'th'))) {
599
                $cell = new TableCell($child);
600
                $row->addCell($cell);
601
            }
602
        }
603
    }
604
605
    protected function getInnerHtml($node)
606
    {
607
        $innerHtml = '';
608
        $children = $node->childNodes;
609
610
        foreach ($children as $child) {
611
            $innerHtml .= $this->htmlFromNode($child);
612
        }
613
614
        return $innerHtml;
615
    }
616
617 View Code Duplication
    protected function htmlFromNode($node)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
618
    {
619
        $domDocument = new \DOMDocument();
620
        $newNode = $domDocument->importNode($node, true);
621
        $domDocument->appendChild($newNode);
622
623
        return trim($domDocument->saveHTML());
624
    }
625
626
    protected function setInnerHtml($node, $html)
627
    {
628
        // DOMDocument::loadHTML does not allow empty strings.
629
        if (strlen($html) === 0) {
630
            $html = '<span class="empty"></span>';
631
        }
632
633
        $doc = new \DOMDocument();
634
        $doc->loadHTML(mb_convert_encoding($this->purifier->purify($html), 'HTML-ENTITIES', 'UTF-8'));
635
        $fragment = $node->ownerDocument->createDocumentFragment();
636
        $root = $doc->getElementsByTagName('body')->item(0);
637
        foreach ($root->childNodes as $child) {
638
            $fragment->appendChild($node->ownerDocument->importNode($child, true));
639
        }
640
641
        $node->appendChild($fragment);
642
    }
643
644
    protected function indexCellValues(Table $table)
645
    {
646
        foreach ($table->getRows() as $rowIndex => $row) {
647
            foreach ($row->getCells() as $cellIndex => $cell) {
648
                $value = trim($cell->getDomNode()->textContent);
649
650
                if (!isset($this->cellValues[$value])) {
651
                    $this->cellValues[$value] = array();
652
                }
653
654
                $this->cellValues[$value][] = new TablePosition($rowIndex, $cellIndex);
655
            }
656
        }
657
    }
658
659
    /**
660
     * @param        $tableRow
661
     * @param        $currentColumn
662
     * @param        $targetColumn
663
     * @param        $currentCell
664
     * @param        $cellsWithMultipleRows
665
     * @param        $diffRow
666
     * @param        $currentIndex
667
     * @param string $diffType
668
     */
669
    protected function syncVirtualColumns(
670
        $tableRow,
671
        DiffRowPosition $position,
672
        &$cellsWithMultipleRows,
673
        $diffRow,
674
        $diffType,
675
        $usingExtraRow = false
676
    ) {
677
        $currentCell = $tableRow->getCell($position->getIndex($diffType));
678
        while ($position->isColumnLessThanOther($diffType) && $currentCell) {
679
            $diffCell = $diffType === 'new' ? $this->diffCells(null, $currentCell, $usingExtraRow) : $this->diffCells(
680
                $currentCell,
681
                null,
682
                $usingExtraRow
683
            );
684
            // Store cell in appliedRowSpans if spans multiple rows
685
            if ($diffCell->getAttribute('rowspan') > 1) {
686
                $cellsWithMultipleRows[$diffCell->getAttribute('rowspan')][] = $diffCell;
687
            }
688
            $diffRow->appendChild($diffCell);
689
            $position->incrementColumn($diffType, $currentCell->getColspan());
690
            $currentCell = $tableRow->getCell($position->incrementIndex($diffType));
691
        }
692
    }
693
694
    /**
695
     * @param null|TableCell  $oldCell
696
     * @param null|TableCell  $newCell
697
     * @param array           $cellsWithMultipleRows
698
     * @param \DOMElement     $diffRow
699
     * @param DiffRowPosition $position
700
     * @param bool            $usingExtraRow
701
     *
702
     * @return \DOMElement
703
     */
704
    protected function diffCellsAndIncrementCounters(
705
        $oldCell,
706
        $newCell,
707
        &$cellsWithMultipleRows,
708
        $diffRow,
709
        DiffRowPosition $position,
710
        $usingExtraRow = false
711
    ) {
712
        $diffCell = $this->diffCells($oldCell, $newCell, $usingExtraRow);
713
        // Store cell in appliedRowSpans if spans multiple rows
714
        if ($diffCell->getAttribute('rowspan') > 1) {
715
            $cellsWithMultipleRows[$diffCell->getAttribute('rowspan')][] = $diffCell;
716
        }
717
        $diffRow->appendChild($diffCell);
718
719
        if ($newCell !== null) {
720
            $position->incrementIndexInNew();
721
            $position->incrementColumnInNew($newCell->getColspan());
722
        }
723
724
        if ($oldCell !== null) {
725
            $position->incrementIndexInOld();
726
            $position->incrementColumnInOld($oldCell->getColspan());
727
        }
728
729
        return $diffCell;
730
    }
731
732
    /**
733
     * @param      $oldRow
734
     * @param      $newRow
735
     * @param      $appliedRowSpans
736
     * @param bool $forceExpansion
737
     */
738
    protected function diffAndAppendRows($oldRow, $newRow, &$appliedRowSpans, $forceExpansion = false)
739
    {
740
        list($rowDom, $extraRow) = $this->diffRows(
741
            $oldRow,
742
            $newRow,
743
            $appliedRowSpans,
744
            $forceExpansion
745
        );
746
747
        $this->diffTable->appendChild($rowDom);
0 ignored issues
show
Bug introduced by
The method appendChild() does not seem to exist on object<Caxy\HtmlDiff\Table\Table>.

This check looks for calls to methods that do not seem to exist on a given type. It looks for the method on the type itself as well as in inherited classes or implemented interfaces.

This is most likely a typographical error or the method has been renamed.

Loading history...
748
749
        if ($extraRow) {
750
            $this->diffTable->appendChild($extraRow);
0 ignored issues
show
Bug introduced by
The method appendChild() does not seem to exist on object<Caxy\HtmlDiff\Table\Table>.

This check looks for calls to methods that do not seem to exist on a given type. It looks for the method on the type itself as well as in inherited classes or implemented interfaces.

This is most likely a typographical error or the method has been renamed.

Loading history...
751
        }
752
    }
753
754
    protected function getMatchPercentage(TableRow $oldRow, TableRow $newRow, $oldIndex, $newIndex)
755
    {
756
        $firstCellWeight = 1.5;
757
        $indexDeltaWeight = 0.25 * (abs($oldIndex - $newIndex));
758
        $thresholdCount = 0;
759
        $totalCount = (min(count($newRow->getCells()), count($oldRow->getCells())) + $firstCellWeight + $indexDeltaWeight) * 100;
0 ignored issues
show
Coding Style introduced by
This line exceeds maximum limit of 120 characters; contains 129 characters

Overly long lines are hard to read on any screen. Most code styles therefor impose a maximum limit on the number of characters in a line.

Loading history...
760
        foreach ($newRow->getCells() as $newIndex => $newCell) {
761
            $oldCell = $oldRow->getCell($newIndex);
0 ignored issues
show
Bug introduced by
Are you sure the assignment to $oldCell is correct as $oldRow->getCell($newIndex) (which targets Caxy\HtmlDiff\Table\TableRow::getCell()) seems to always return null.

This check looks for function or method calls that always return null and whose return value is assigned to a variable.

class A
{
    function getObject()
    {
        return null;
    }

}

$a = new A();
$object = $a->getObject();

The method getObject() can return nothing but null, so it makes no sense to assign that value to a variable.

The reason is most likely that a function or method is imcomplete or has been reduced for debug purposes.

Loading history...
762
763
            if ($oldCell) {
764
                $percentage = null;
765
                similar_text($oldCell->getInnerHtml(), $newCell->getInnerHtml(), $percentage);
766
767
                if ($percentage > ($this->matchThreshold * 0.50)) {
768
                    $increment = $percentage;
769
                    if ($newIndex === 0 && $percentage > 95) {
770
                        $increment = $increment * $firstCellWeight;
771
                    }
772
                    $thresholdCount += $increment;
773
                }
774
            }
775
        }
776
777
        $matchPercentage = ($totalCount > 0) ? ($thresholdCount / $totalCount) : 0;
778
779
        return $matchPercentage;
780
    }
781
}
782