Cancelled
Push — master ( 5c013f...948458 )
by Josh
322:57 queued 322:57
created

TableDiff::diffTableRowsWithMatches()   C

Complexity

Conditions 14
Paths 54

Size

Total Lines 72
Code Lines 47

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 0
CRAP Score 210

Importance

Changes 5
Bugs 0 Features 2
Metric Value
c 5
b 0
f 2
dl 0
loc 72
ccs 0
cts 55
cp 0
rs 5.4961
cc 14
eloc 47
nc 54
nop 3
crap 210

How to fix   Long Method    Complexity   

Long Method

Small methods make your code easier to understand, in particular if combined with a good name. Besides, if your method is small, finding a good name is usually much easier.

For example, if you find yourself adding comments to a method's body, this is usually a good sign to extract the commented part to a new method, and use the comment as a starting point when coming up with a good name for this new method.

Commonly applied refactorings include:

1
<?php
2
3
namespace Caxy\HtmlDiff\Table;
4
5
use Caxy\HtmlDiff\AbstractDiff;
6
use Caxy\HtmlDiff\HtmlDiff;
7
use Caxy\HtmlDiff\Operation;
8
9
/**
10
 * Class TableDiff
11
 * @package Caxy\HtmlDiff\Table
12
 */
13
class TableDiff extends AbstractDiff
14
{
15
    /**
16
     * @var null|Table
17
     */
18
    protected $oldTable = null;
19
20
    /**
21
     * @var null|Table
22
     */
23
    protected $newTable = null;
24
25
    /**
26
     * @var null|\DOMElement
27
     */
28
    protected $diffTable = null;
29
30
    /**
31
     * @var null|\DOMDocument
32
     */
33
    protected $diffDom = null;
34
35
    /**
36
     * @var int
37
     */
38
    protected $newRowOffsets = 0;
39
40
    /**
41
     * @var int
42
     */
43
    protected $oldRowOffsets = 0;
44
45
    /**
46
     * @var array
47
     */
48
    protected $cellValues = array();
49
50
    /**
51
     * @var \HTMLPurifier
52
     */
53
    protected $purifier;
54
55
    /**
56
     * TableDiff constructor.
57
     *
58
     * @param string     $oldText
59
     * @param string     $newText
60
     * @param string     $encoding
61
     * @param array|null $specialCaseTags
62
     * @param bool|null  $groupDiffs
63
     */
64
    public function __construct($oldText, $newText, $encoding = 'UTF-8', $specialCaseTags = null, $groupDiffs = null)
65
    {
66
        parent::__construct($oldText, $newText, $encoding, $specialCaseTags, $groupDiffs);
67
68
        $this->purifier = new \HTMLPurifier(\HTMLPurifier_Config::createDefault());
69
    }
70
71
    /**
72
     * @return string
73
     */
74
    public function build()
75
    {
76
        $this->buildTableDoms();
77
78
        $this->diffDom = new \DOMDocument();
79
80
        $this->indexCellValues($this->newTable);
0 ignored issues
show
Bug introduced by
It seems like $this->newTable can be null; however, indexCellValues() does not accept null, maybe add an additional type check?

Unless you are absolutely sure that the expression can never be null because of other conditions, we strongly recommend to add an additional type check to your code:

/** @return stdClass|null */
function mayReturnNull() { }

function doesNotAcceptNull(stdClass $x) { }

// With potential error.
function withoutCheck() {
    $x = mayReturnNull();
    doesNotAcceptNull($x); // Potential error here.
}

// Safe - Alternative 1
function withCheck1() {
    $x = mayReturnNull();
    if ( ! $x instanceof stdClass) {
        throw new \LogicException('$x must be defined.');
    }
    doesNotAcceptNull($x);
}

// Safe - Alternative 2
function withCheck2() {
    $x = mayReturnNull();
    if ($x instanceof stdClass) {
        doesNotAcceptNull($x);
    }
}
Loading history...
81
82
        $this->diffTableContent();
83
84
        return $this->content;
85
    }
86
87
    protected function diffTableContent()
88
    {
89
        $this->diffDom = new \DOMDocument();
90
        $this->diffTable = $this->newTable->cloneNode($this->diffDom);
91
        $this->diffDom->appendChild($this->diffTable);
92
93
        $oldRows = $this->oldTable->getRows();
94
        $newRows = $this->newTable->getRows();
95
96
        $oldMatchData = array();
97
        $newMatchData = array();
98
99
        /* @var $oldRow TableRow */
100
        foreach ($oldRows as $oldIndex => $oldRow) {
101
            $oldMatchData[$oldIndex] = array();
102
103
            // Get match percentages
104
            /* @var $newRow TableRow */
105
            foreach ($newRows as $newIndex => $newRow) {
106
                if (!array_key_exists($newIndex, $newMatchData)) {
107
                    $newMatchData[$newIndex] = array();
108
                }
109
110
                // similar_text
111
                $percentage = $this->getMatchPercentage($oldRow, $newRow, $oldIndex, $newIndex);
112
113
                $oldMatchData[$oldIndex][$newIndex] = $percentage;
114
                $newMatchData[$newIndex][$oldIndex] = $percentage;
115
            }
116
        }
117
118
        $matches = $this->getRowMatches($oldMatchData, $newMatchData);
119
        $this->diffTableRowsWithMatches($oldRows, $newRows, $matches);
120
121
        $this->content = $this->htmlFromNode($this->diffTable);
122
    }
123
124
    /**
125
     * @param TableRow[] $oldRows
126
     * @param TableRow[] $newRows
127
     * @param RowMatch[] $matches
128
     */
129
    protected function diffTableRowsWithMatches($oldRows, $newRows, $matches)
130
    {
131
        $operations = array();
132
133
        $indexInOld = 0;
134
        $indexInNew = 0;
135
136
        $oldRowCount = count($oldRows);
137
        $newRowCount = count($newRows);
138
139
        $matches[] = new RowMatch($newRowCount, $oldRowCount, $newRowCount, $oldRowCount);
140
141
        // build operations
142
        foreach ($matches as $match) {
143
            $matchAtIndexInOld = $indexInOld === $match->getStartInOld();
144
            $matchAtIndexInNew = $indexInNew === $match->getStartInNew();
145
146
            $action = 'equal';
147
148
            if (!$matchAtIndexInOld && !$matchAtIndexInNew) {
149
                $action = 'replace';
150
            } elseif ($matchAtIndexInOld && !$matchAtIndexInNew) {
151
                $action = 'insert';
152
            } elseif (!$matchAtIndexInOld && $matchAtIndexInNew) {
153
                $action = 'delete';
154
            }
155
156
            if ($action !== 'equal') {
157
                $operations[] = new Operation(
158
                    $action,
159
                    $indexInOld,
160
                    $match->getStartInOld(),
161
                    $indexInNew,
162
                    $match->getStartInNew()
163
                );
164
            }
165
166
            $operations[] = new Operation(
167
                'equal',
168
                $match->getStartInOld(),
169
                $match->getEndInOld(),
170
                $match->getStartInNew(),
171
                $match->getEndInNew()
172
            );
173
174
            $indexInOld = $match->getEndInOld();
175
            $indexInNew = $match->getEndInNew();
176
        }
177
178
        $appliedRowSpans = array();
179
180
        // process operations
181
        foreach ($operations as $operation) {
182
            switch ($operation->action) {
183
                case 'equal':
184
                    $this->processEqualOperation($operation, $oldRows, $newRows, $appliedRowSpans);
185
                    break;
186
187
                case 'delete':
188
                    $this->processDeleteOperation($operation, $oldRows, $appliedRowSpans);
189
                    break;
190
191
                case 'insert':
192
                    $this->processInsertOperation($operation, $newRows, $appliedRowSpans);
193
                    break;
194
195
                case 'replace':
196
                    $this->processReplaceOperation($operation, $oldRows, $newRows, $appliedRowSpans);
197
                    break;
198
            }
199
        }
200
    }
201
202
    /**
203
     * @param Operation $operation
204
     * @param array     $newRows
205
     * @param array     $appliedRowSpans
206
     * @param bool      $forceExpansion
207
     */
208 View Code Duplication
    protected function processInsertOperation(
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
209
        Operation $operation,
210
        $newRows,
211
        &$appliedRowSpans,
212
        $forceExpansion = false
213
    ) {
214
        $targetRows = array_slice($newRows, $operation->startInNew, $operation->endInNew - $operation->startInNew);
215
        foreach ($targetRows as $row) {
216
            $this->diffAndAppendRows(null, $row, $appliedRowSpans, $forceExpansion);
217
        }
218
    }
219
220
    /**
221
     * @param Operation $operation
222
     * @param array     $oldRows
223
     * @param array     $appliedRowSpans
224
     * @param bool      $forceExpansion
225
     */
226 View Code Duplication
    protected function processDeleteOperation(
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
227
        Operation $operation,
228
        $oldRows,
229
        &$appliedRowSpans,
230
        $forceExpansion = false
231
    ) {
232
        $targetRows = array_slice($oldRows, $operation->startInOld, $operation->endInOld - $operation->startInOld);
233
        foreach ($targetRows as $row) {
234
            $this->diffAndAppendRows($row, null, $appliedRowSpans, $forceExpansion);
235
        }
236
    }
237
238
    /**
239
     * @param Operation $operation
240
     * @param array     $oldRows
241
     * @param array     $newRows
242
     * @param array     $appliedRowSpans
243
     */
244
    protected function processEqualOperation(Operation $operation, $oldRows, $newRows, &$appliedRowSpans)
245
    {
246
        $targetOldRows = array_values(
247
            array_slice($oldRows, $operation->startInOld, $operation->endInOld - $operation->startInOld)
248
        );
249
        $targetNewRows = array_values(
250
            array_slice($newRows, $operation->startInNew, $operation->endInNew - $operation->startInNew)
251
        );
252
253
        foreach ($targetNewRows as $index => $newRow) {
254
            if (!isset($targetOldRows[$index])) {
255
                continue;
256
            }
257
258
            $this->diffAndAppendRows($targetOldRows[$index], $newRow, $appliedRowSpans);
259
        }
260
    }
261
262
    /**
263
     * @param Operation $operation
264
     * @param array     $oldRows
265
     * @param array     $newRows
266
     * @param array     $appliedRowSpans
267
     */
268
    protected function processReplaceOperation(Operation $operation, $oldRows, $newRows, &$appliedRowSpans)
269
    {
270
        $this->processDeleteOperation($operation, $oldRows, $appliedRowSpans, true);
271
        $this->processInsertOperation($operation, $newRows, $appliedRowSpans, true);
272
    }
273
274
    /**
275
     * @param array $oldMatchData
276
     * @param array $newMatchData
277
     *
278
     * @return array
279
     */
280
    protected function getRowMatches($oldMatchData, $newMatchData)
281
    {
282
        $matches = array();
283
284
        $startInOld = 0;
285
        $startInNew = 0;
286
        $endInOld = count($oldMatchData);
287
        $endInNew = count($newMatchData);
288
289
        $this->findRowMatches($newMatchData, $startInOld, $endInOld, $startInNew, $endInNew, $matches);
290
291
        return $matches;
292
    }
293
294
    /**
295
     * @param array $newMatchData
296
     * @param int   $startInOld
297
     * @param int   $endInOld
298
     * @param int   $startInNew
299
     * @param int   $endInNew
300
     * @param array $matches
301
     */
302
    protected function findRowMatches($newMatchData, $startInOld, $endInOld, $startInNew, $endInNew, &$matches)
303
    {
304
        $match = $this->findRowMatch($newMatchData, $startInOld, $endInOld, $startInNew, $endInNew);
305
        if ($match !== null) {
306
            if ($startInOld < $match->getStartInOld() &&
307
                $startInNew < $match->getStartInNew()
308
            ) {
309
                $this->findRowMatches(
310
                    $newMatchData,
311
                    $startInOld,
312
                    $match->getStartInOld(),
313
                    $startInNew,
314
                    $match->getStartInNew(),
315
                    $matches
316
                );
317
            }
318
319
            $matches[] = $match;
320
321
            if ($match->getEndInOld() < $endInOld &&
322
                $match->getEndInNew() < $endInNew
323
            ) {
324
                $this->findRowMatches(
325
                    $newMatchData,
326
                    $match->getEndInOld(),
327
                    $endInOld,
328
                    $match->getEndInNew(),
329
                    $endInNew,
330
                    $matches
331
                );
332
            }
333
        }
334
    }
335
336
    /**
337
     * @param array $newMatchData
338
     * @param int   $startInOld
339
     * @param int   $endInOld
340
     * @param int   $startInNew
341
     * @param int   $endInNew
342
     *
343
     * @return RowMatch|null
344
     */
345
    protected function findRowMatch($newMatchData, $startInOld, $endInOld, $startInNew, $endInNew)
346
    {
347
        $bestMatch = null;
348
        $bestPercentage = 0;
349
350
        foreach ($newMatchData as $newIndex => $oldMatches) {
351
            if ($newIndex < $startInNew) {
352
                continue;
353
            }
354
355
            if ($newIndex >= $endInNew) {
356
                break;
357
            }
358
            foreach ($oldMatches as $oldIndex => $percentage) {
359
                if ($oldIndex < $startInOld) {
360
                    continue;
361
                }
362
363
                if ($oldIndex >= $endInOld) {
364
                    break;
365
                }
366
367
                if ($percentage > $bestPercentage) {
368
                    $bestPercentage = $percentage;
369
                    $bestMatch = array(
370
                        'oldIndex' => $oldIndex,
371
                        'newIndex' => $newIndex,
372
                        'percentage' => $percentage,
373
                    );
374
                }
375
            }
376
        }
377
378
        if ($bestMatch !== null) {
379
            return new RowMatch(
380
                $bestMatch['newIndex'],
381
                $bestMatch['oldIndex'],
382
                $bestMatch['newIndex'] + 1,
383
                $bestMatch['oldIndex'] + 1,
384
                $bestMatch['percentage']
385
            );
386
        }
387
388
        return null;
389
    }
390
391
    /**
392
     * @param TableRow|null $oldRow
393
     * @param TableRow|null $newRow
394
     * @param array         $appliedRowSpans
395
     * @param bool          $forceExpansion
396
     *
397
     * @return array
398
     */
399
    protected function diffRows($oldRow, $newRow, array &$appliedRowSpans, $forceExpansion = false)
400
    {
401
        // create tr dom element
402
        $rowToClone = $newRow ?: $oldRow;
403
        /* @var $diffRow \DOMElement */
404
        $diffRow = $this->diffDom->importNode($rowToClone->getDomNode()->cloneNode(false), false);
405
406
        $oldCells = $oldRow ? $oldRow->getCells() : array();
407
        $newCells = $newRow ? $newRow->getCells() : array();
408
409
        $position = new DiffRowPosition();
410
411
        $extraRow = null;
412
413
        /* @var $expandCells \DOMElement[] */
414
        $expandCells = array();
415
        /* @var $cellsWithMultipleRows \DOMElement[] */
416
        $cellsWithMultipleRows = array();
417
418
        $newCellCount = count($newCells);
419
        while ($position->getIndexInNew() < $newCellCount) {
420
            if (!$position->areColumnsEqual()) {
421
                $type = $position->getLesserColumnType();
422
                if ($type === 'new') {
423
                    $row = $newRow;
424
                    $targetRow = $extraRow;
425
                } else {
426
                    $row = $oldRow;
427
                    $targetRow = $diffRow;
428
                }
429
                if ($row && $targetRow && (!$type === 'old' || isset($oldCells[$position->getIndexInOld()]))) {
430
                    $this->syncVirtualColumns($row, $position, $cellsWithMultipleRows, $targetRow, $type, true);
431
432
                    continue;
433
                }
434
            }
435
436
            /* @var $newCell TableCell */
437
            $newCell = $newCells[$position->getIndexInNew()];
438
            /* @var $oldCell TableCell */
439
            $oldCell = isset($oldCells[$position->getIndexInOld()]) ? $oldCells[$position->getIndexInOld()] : null;
440
441
            if ($oldCell && $newCell->getColspan() != $oldCell->getColspan()) {
442
                if (null === $extraRow) {
443
                    /* @var $extraRow \DOMElement */
444
                    $extraRow = $this->diffDom->importNode($rowToClone->getDomNode()->cloneNode(false), false);
445
                }
446
447
                if ($oldCell->getColspan() > $newCell->getColspan()) {
448
                    $this->diffCellsAndIncrementCounters(
449
                        $oldCell,
450
                        null,
451
                        $cellsWithMultipleRows,
452
                        $diffRow,
453
                        $position,
454
                        true
455
                    );
456
                    $this->syncVirtualColumns($newRow, $position, $cellsWithMultipleRows, $extraRow, 'new', true);
0 ignored issues
show
Bug introduced by
It seems like $newRow defined by parameter $newRow on line 399 can be null; however, Caxy\HtmlDiff\Table\Tabl...f::syncVirtualColumns() does not accept null, maybe add an additional type check?

It seems like you allow that null is being passed for a parameter, however the function which is called does not seem to accept null.

We recommend to add an additional type check (or disallow null for the parameter):

function notNullable(stdClass $x) { }

// Unsafe
function withoutCheck(stdClass $x = null) {
    notNullable($x);
}

// Safe - Alternative 1: Adding Additional Type-Check
function withCheck(stdClass $x = null) {
    if ($x instanceof stdClass) {
        notNullable($x);
    }
}

// Safe - Alternative 2: Changing Parameter
function withNonNullableParam(stdClass $x) {
    notNullable($x);
}
Loading history...
457
                } else {
458
                    $this->diffCellsAndIncrementCounters(
459
                        null,
460
                        $newCell,
461
                        $cellsWithMultipleRows,
462
                        $extraRow,
463
                        $position,
464
                        true
465
                    );
466
                    $this->syncVirtualColumns($oldRow, $position, $cellsWithMultipleRows, $diffRow, 'old', true);
0 ignored issues
show
Bug introduced by
It seems like $oldRow defined by parameter $oldRow on line 399 can be null; however, Caxy\HtmlDiff\Table\Tabl...f::syncVirtualColumns() does not accept null, maybe add an additional type check?

It seems like you allow that null is being passed for a parameter, however the function which is called does not seem to accept null.

We recommend to add an additional type check (or disallow null for the parameter):

function notNullable(stdClass $x) { }

// Unsafe
function withoutCheck(stdClass $x = null) {
    notNullable($x);
}

// Safe - Alternative 1: Adding Additional Type-Check
function withCheck(stdClass $x = null) {
    if ($x instanceof stdClass) {
        notNullable($x);
    }
}

// Safe - Alternative 2: Changing Parameter
function withNonNullableParam(stdClass $x) {
    notNullable($x);
}
Loading history...
467
                }
468
            } else {
469
                $diffCell = $this->diffCellsAndIncrementCounters(
470
                    $oldCell,
471
                    $newCell,
472
                    $cellsWithMultipleRows,
473
                    $diffRow,
474
                    $position
475
                );
476
                $expandCells[] = $diffCell;
477
            }
478
        }
479
480
        $oldCellCount = count($oldCells);
481
        while ($position->getIndexInOld() < $oldCellCount) {
482
            $diffCell = $this->diffCellsAndIncrementCounters(
483
                $oldCells[$position->getIndexInOld()],
484
                null,
485
                $cellsWithMultipleRows,
486
                $diffRow,
487
                $position
488
            );
489
            $expandCells[] = $diffCell;
490
        }
491
492
        if ($extraRow) {
493
            foreach ($expandCells as $expandCell) {
494
                $rowspan = $expandCell->getAttribute('rowspan') ?: 1;
495
                $expandCell->setAttribute('rowspan', 1 + $rowspan);
496
            }
497
        }
498
499
        if ($extraRow || $forceExpansion) {
500
            foreach ($appliedRowSpans as $rowSpanCells) {
501
                /* @var $rowSpanCells \DOMElement[] */
502
                foreach ($rowSpanCells as $extendCell) {
503
                    $rowspan = $extendCell->getAttribute('rowspan') ?: 1;
504
                    $extendCell->setAttribute('rowspan', 1 + $rowspan);
505
                }
506
            }
507
        }
508
509
        if (!$forceExpansion) {
510
            array_shift($appliedRowSpans);
511
            $appliedRowSpans = array_values($appliedRowSpans);
512
        }
513
        $appliedRowSpans = array_merge($appliedRowSpans, array_values($cellsWithMultipleRows));
514
515
        return array($diffRow, $extraRow);
516
    }
517
518
    /**
519
     * @param TableCell|null $oldCell
520
     * @param TableCell|null $newCell
521
     *
522
     * @return \DOMElement
523
     */
524
    protected function getNewCellNode(TableCell $oldCell = null, TableCell $newCell = null)
525
    {
526
        // If only one cell exists, use it
527
        if (!$oldCell || !$newCell) {
528
            $clone = $newCell
529
                ? $newCell->getDomNode()->cloneNode(false)
530
                : $oldCell->getDomNode()->cloneNode(false);
0 ignored issues
show
Bug introduced by
It seems like $oldCell is not always an object, but can also be of type null. Maybe add an additional type check?

If a variable is not always an object, we recommend to add an additional type check to ensure your method call is safe:

function someFunction(A $objectMaybe = null)
{
    if ($objectMaybe instanceof A) {
        $objectMaybe->doSomething();
    }
}
Loading history...
531
        } else {
532
            $oldNode = $oldCell->getDomNode();
533
            $newNode = $newCell->getDomNode();
534
535
            /* @var $clone \DOMElement */
536
            $clone = $newNode->cloneNode(false);
537
538
            $oldRowspan = $oldNode->getAttribute('rowspan') ?: 1;
539
            $oldColspan = $oldNode->getAttribute('colspan') ?: 1;
540
            $newRowspan = $newNode->getAttribute('rowspan') ?: 1;
541
            $newColspan = $newNode->getAttribute('colspan') ?: 1;
542
543
            $clone->setAttribute('rowspan', max($oldRowspan, $newRowspan));
544
            $clone->setAttribute('colspan', max($oldColspan, $newColspan));
545
        }
546
547
        return $this->diffDom->importNode($clone);
548
    }
549
550
    /**
551
     * @param TableCell|null $oldCell
552
     * @param TableCell|null $newCell
553
     * @param bool           $usingExtraRow
554
     *
555
     * @return \DOMElement
556
     */
557
    protected function diffCells($oldCell, $newCell, $usingExtraRow = false)
558
    {
559
        $diffCell = $this->getNewCellNode($oldCell, $newCell);
560
561
        $oldContent = $oldCell ? $this->getInnerHtml($oldCell->getDomNode()) : '';
562
        $newContent = $newCell ? $this->getInnerHtml($newCell->getDomNode()) : '';
563
564
        $htmlDiff = new HtmlDiff(
565
            mb_convert_encoding($oldContent, 'UTF-8', 'HTML-ENTITIES'),
566
            mb_convert_encoding($newContent, 'UTF-8', 'HTML-ENTITIES'),
567
            $this->encoding,
568
            $this->specialCaseTags,
569
            $this->groupDiffs
570
        );
571
        $htmlDiff->setMatchThreshold($this->matchThreshold);
572
        $diff = $htmlDiff->build();
573
574
        $this->setInnerHtml($diffCell, $diff);
575
576
        if (null === $newCell) {
577
            $diffCell->setAttribute('class', trim($diffCell->getAttribute('class').' del'));
578
        }
579
580
        if (null === $oldCell) {
581
            $diffCell->setAttribute('class', trim($diffCell->getAttribute('class').' ins'));
582
        }
583
584
        if ($usingExtraRow) {
585
            $diffCell->setAttribute('class', trim($diffCell->getAttribute('class').' extra-row'));
586
        }
587
588
        return $diffCell;
589
    }
590
591
    protected function buildTableDoms()
592
    {
593
        $this->oldTable = $this->parseTableStructure($this->oldText);
594
        $this->newTable = $this->parseTableStructure($this->newText);
595
    }
596
597
    /**
598
     * @param string $text
599
     *
600
     * @return \DOMDocument
601
     */
602
    protected function createDocumentWithHtml($text)
603
    {
604
        $dom = new \DOMDocument();
605
        $dom->loadHTML(mb_convert_encoding(
606
            $this->purifier->purify(mb_convert_encoding($text, $this->encoding, mb_detect_encoding($text))),
607
            'HTML-ENTITIES',
608
            $this->encoding
609
        ));
610
611
        return $dom;
612
    }
613
614
    /**
615
     * @param string $text
616
     *
617
     * @return Table
618
     */
619
    protected function parseTableStructure($text)
620
    {
621
        $dom = $this->createDocumentWithHtml($text);
622
623
        $tableNode = $dom->getElementsByTagName('table')->item(0);
624
625
        $table = new Table($tableNode);
0 ignored issues
show
Documentation introduced by
$tableNode is of type object<DOMNode>, but the function expects a null|object<DOMElement>.

It seems like the type of the argument is not accepted by the function/method which you are calling.

In some cases, in particular if PHP’s automatic type-juggling kicks in this might be fine. In other cases, however this might be a bug.

We suggest to add an explicit type cast like in the following example:

function acceptsInteger($int) { }

$x = '123'; // string "123"

// Instead of
acceptsInteger($x);

// we recommend to use
acceptsInteger((integer) $x);
Loading history...
626
627
        $this->parseTable($table);
628
629
        return $table;
630
    }
631
632
    /**
633
     * @param Table         $table
634
     * @param \DOMNode|null $node
635
     */
636
    protected function parseTable(Table $table, \DOMNode $node = null)
637
    {
638
        if ($node === null) {
639
            $node = $table->getDomNode();
640
        }
641
642
        if (!$node->childNodes) {
643
            return;
644
        }
645
646
        foreach ($node->childNodes as $child) {
647
            if ($child->nodeName === 'tr') {
648
                $row = new TableRow($child);
649
                $table->addRow($row);
650
651
                $this->parseTableRow($row);
652
            } else {
653
                $this->parseTable($table, $child);
654
            }
655
        }
656
    }
657
658
    /**
659
     * @param TableRow $row
660
     */
661
    protected function parseTableRow(TableRow $row)
662
    {
663
        $node = $row->getDomNode();
664
665
        foreach ($node->childNodes as $child) {
666
            if (in_array($child->nodeName, array('td', 'th'))) {
667
                $cell = new TableCell($child);
668
                $row->addCell($cell);
669
            }
670
        }
671
    }
672
673
    /**
674
     * @param \DOMNode $node
675
     *
676
     * @return string
677
     */
678
    protected function getInnerHtml($node)
679
    {
680
        $innerHtml = '';
681
        $children = $node->childNodes;
682
683
        foreach ($children as $child) {
684
            $innerHtml .= $this->htmlFromNode($child);
685
        }
686
687
        return $innerHtml;
688
    }
689
690
    /**
691
     * @param \DOMNode $node
692
     *
693
     * @return string
694
     */
695 View Code Duplication
    protected function htmlFromNode($node)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
696
    {
697
        $domDocument = new \DOMDocument();
698
        $newNode = $domDocument->importNode($node, true);
699
        $domDocument->appendChild($newNode);
700
701
        return $domDocument->saveHTML();
702
    }
703
704
    /**
705
     * @param \DOMNode $node
706
     * @param string   $html
707
     */
708
    protected function setInnerHtml($node, $html)
709
    {
710
        // DOMDocument::loadHTML does not allow empty strings.
711
        if (strlen($html) === 0) {
712
            $html = '<span class="empty"></span>';
713
        }
714
715
        $doc = $this->createDocumentWithHtml($html);
716
        $fragment = $node->ownerDocument->createDocumentFragment();
717
        $root = $doc->getElementsByTagName('body')->item(0);
718
        foreach ($root->childNodes as $child) {
719
            $fragment->appendChild($node->ownerDocument->importNode($child, true));
720
        }
721
722
        $node->appendChild($fragment);
723
    }
724
725
    /**
726
     * @param Table $table
727
     */
728
    protected function indexCellValues(Table $table)
729
    {
730
        foreach ($table->getRows() as $rowIndex => $row) {
731
            foreach ($row->getCells() as $cellIndex => $cell) {
732
                $value = trim($cell->getDomNode()->textContent);
733
734
                if (!isset($this->cellValues[$value])) {
735
                    $this->cellValues[$value] = array();
736
                }
737
738
                $this->cellValues[$value][] = new TablePosition($rowIndex, $cellIndex);
739
            }
740
        }
741
    }
742
743
    /**
744
     * @param TableRow        $tableRow
745
     * @param DiffRowPosition $position
746
     * @param array           $cellsWithMultipleRows
747
     * @param \DOMNode        $diffRow
748
     * @param string          $diffType
749
     * @param bool            $usingExtraRow
750
     */
751
    protected function syncVirtualColumns(
752
        $tableRow,
753
        DiffRowPosition $position,
754
        &$cellsWithMultipleRows,
755
        $diffRow,
756
        $diffType,
757
        $usingExtraRow = false
758
    ) {
759
        $currentCell = $tableRow->getCell($position->getIndex($diffType));
760
        while ($position->isColumnLessThanOther($diffType) && $currentCell) {
761
            $diffCell = $diffType === 'new' ? $this->diffCells(null, $currentCell, $usingExtraRow) : $this->diffCells(
762
                $currentCell,
763
                null,
764
                $usingExtraRow
765
            );
766
            // Store cell in appliedRowSpans if spans multiple rows
767
            if ($diffCell->getAttribute('rowspan') > 1) {
768
                $cellsWithMultipleRows[$diffCell->getAttribute('rowspan')][] = $diffCell;
769
            }
770
            $diffRow->appendChild($diffCell);
771
            $position->incrementColumn($diffType, $currentCell->getColspan());
772
            $currentCell = $tableRow->getCell($position->incrementIndex($diffType));
773
        }
774
    }
775
776
    /**
777
     * @param null|TableCell  $oldCell
778
     * @param null|TableCell  $newCell
779
     * @param array           $cellsWithMultipleRows
780
     * @param \DOMElement     $diffRow
781
     * @param DiffRowPosition $position
782
     * @param bool            $usingExtraRow
783
     *
784
     * @return \DOMElement
785
     */
786
    protected function diffCellsAndIncrementCounters(
787
        $oldCell,
788
        $newCell,
789
        &$cellsWithMultipleRows,
790
        $diffRow,
791
        DiffRowPosition $position,
792
        $usingExtraRow = false
793
    ) {
794
        $diffCell = $this->diffCells($oldCell, $newCell, $usingExtraRow);
795
        // Store cell in appliedRowSpans if spans multiple rows
796
        if ($diffCell->getAttribute('rowspan') > 1) {
797
            $cellsWithMultipleRows[$diffCell->getAttribute('rowspan')][] = $diffCell;
798
        }
799
        $diffRow->appendChild($diffCell);
800
801
        if ($newCell !== null) {
802
            $position->incrementIndexInNew();
803
            $position->incrementColumnInNew($newCell->getColspan());
804
        }
805
806
        if ($oldCell !== null) {
807
            $position->incrementIndexInOld();
808
            $position->incrementColumnInOld($oldCell->getColspan());
809
        }
810
811
        return $diffCell;
812
    }
813
814
    /**
815
     * @param TableRow|null $oldRow
816
     * @param TableRow|null $newRow
817
     * @param array         $appliedRowSpans
818
     * @param bool          $forceExpansion
819
     */
820
    protected function diffAndAppendRows($oldRow, $newRow, &$appliedRowSpans, $forceExpansion = false)
821
    {
822
        list($rowDom, $extraRow) = $this->diffRows(
823
            $oldRow,
824
            $newRow,
825
            $appliedRowSpans,
826
            $forceExpansion
827
        );
828
829
        $this->diffTable->appendChild($rowDom);
830
831
        if ($extraRow) {
832
            $this->diffTable->appendChild($extraRow);
833
        }
834
    }
835
836
    /**
837
     * @param TableRow $oldRow
838
     * @param TableRow $newRow
839
     * @param int      $oldIndex
840
     * @param int      $newIndex
841
     *
842
     * @return float|int
843
     */
844
    protected function getMatchPercentage(TableRow $oldRow, TableRow $newRow, $oldIndex, $newIndex)
845
    {
846
        $firstCellWeight = 1.5;
847
        $indexDeltaWeight = 0.25 * (abs($oldIndex - $newIndex));
848
        $thresholdCount = 0;
849
        $minCells = min(count($newRow->getCells()), count($oldRow->getCells()));
850
        $totalCount = ($minCells + $firstCellWeight + $indexDeltaWeight) * 100;
851
        foreach ($newRow->getCells() as $newIndex => $newCell) {
852
            $oldCell = $oldRow->getCell($newIndex);
0 ignored issues
show
Bug introduced by
Are you sure the assignment to $oldCell is correct as $oldRow->getCell($newIndex) (which targets Caxy\HtmlDiff\Table\TableRow::getCell()) seems to always return null.

This check looks for function or method calls that always return null and whose return value is assigned to a variable.

class A
{
    function getObject()
    {
        return null;
    }

}

$a = new A();
$object = $a->getObject();

The method getObject() can return nothing but null, so it makes no sense to assign that value to a variable.

The reason is most likely that a function or method is imcomplete or has been reduced for debug purposes.

Loading history...
853
854
            if ($oldCell) {
855
                $percentage = null;
856
                similar_text($oldCell->getInnerHtml(), $newCell->getInnerHtml(), $percentage);
857
858
                if ($percentage > ($this->matchThreshold * 0.50)) {
859
                    $increment = $percentage;
860
                    if ($newIndex === 0 && $percentage > 95) {
861
                        $increment = $increment * $firstCellWeight;
862
                    }
863
                    $thresholdCount += $increment;
864
                }
865
            }
866
        }
867
868
        return ($totalCount > 0) ? ($thresholdCount / $totalCount) : 0;
869
    }
870
}
871