Test Failed
Pull Request — master (#49)
by Josh
04:36
created

ListDiffLines::build()   C

Complexity

Conditions 11
Paths 1

Size

Total Lines 56
Code Lines 32

Duplication

Lines 0
Ratio 0 %

Importance

Changes 1
Bugs 0 Features 1
Metric Value
cc 11
eloc 32
c 1
b 0
f 1
nc 1
nop 0
dl 0
loc 56
rs 6.5481

How to fix   Long Method    Complexity   

Long Method

Small methods make your code easier to understand, in particular if combined with a good name. Besides, if your method is small, finding a good name is usually much easier.

For example, if you find yourself adding comments to a method's body, this is usually a good sign to extract the commented part to a new method, and use the comment as a starting point when coming up with a good name for this new method.

Commonly applied refactorings include:

1
<?php
2
3
namespace Caxy\HtmlDiff;
4
5
use Sunra\PhpSimple\HtmlDomParser;
6
7
class ListDiffLines extends ListDiff
8
{
9
    const CLASS_LIST_ITEM_ADDED = 'normal new';
10
    const CLASS_LIST_ITEM_DELETED = 'removed';
11
    const CLASS_LIST_ITEM_CHANGED = 'replacement';
12
    const CLASS_LIST_ITEM_NONE = 'normal';
13
14
    protected static $containerTags = array('html', 'body', 'p', 'blockquote',
15
        'h1', 'h2', 'h3', 'h4', 'h5', 'pre', 'div', 'ul', 'ol', 'li',
16
        'table', 'tbody', 'tr', 'td', 'th', 'br', 'hr', 'code', 'dl',
17
        'dt', 'dd', 'input', 'form', 'img', 'span', 'a');
18
    protected static $styleTags = array('i', 'b', 'strong', 'em', 'font',
19
        'big', 'del', 'tt', 'sub', 'sup', 'strike');
20
21
    protected static $listContentTags = array(
22
        'h1', 'h2', 'h3', 'h4', 'h5', 'pre', 'div', 'br', 'hr', 'code', 'input',
23
        'form', 'img', 'span', 'a', 'i', 'b', 'strong', 'em', 'font', 'big',
24
        'del', 'tt', 'sub', 'sup', 'strike',
25
    );
26
27
    /**
28
     * @var LcsService
29
     */
30
    protected $lcsService;
31
32
    /**
33
     * @param string              $oldText
34
     * @param string              $newText
35
     * @param HtmlDiffConfig|null $config
36
     *
37
     * @return ListDiffLines
38
     */
39 View Code Duplication
    public static function create($oldText, $newText, HtmlDiffConfig $config = null)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
40
    {
41
        $diff = new self($oldText, $newText);
42
43
        if (null !== $config) {
44
            $diff->setConfig($config);
45
        }
46
47
        return $diff;
48
    }
49
50
    public function build()
51
    {
52
        $threshold = $this->config->getMatchThreshold();
53
54
        $comparator = function($a, $b) use ($threshold) {
55
            $percentage = null;
56
57
            // Strip tags and check similarity
58
            $aStripped = strip_tags($a);
59
            $bStripped = strip_tags($b);
60
            similar_text($aStripped, $bStripped, $percentage);
61
62
            if ($percentage >= $threshold) {
63
                return true;
64
            }
65
66
            // Check w/o stripped tags
67
            similar_text($a, $b, $percentage);
68
            if ($percentage >= $threshold) {
69
                return true;
70
            }
71
72
            // Check common prefix/ suffix length
73
            $aCleaned = trim($aStripped);
74
            $bCleaned = trim($bStripped);
75
            if (strlen($aCleaned) === 0 || strlen($bCleaned) === 0) {
76
                $aCleaned = $a;
77
                $bCleaned = $b;
78
            }
79
            if (strlen($aCleaned) === 0 || strlen($bCleaned) === 0) {
80
                return false;
81
            }
82
            $prefixIndex = Preprocessor::diffCommonPrefix($aCleaned, $bCleaned);
83
            $suffixIndex = Preprocessor::diffCommonSuffix($aCleaned, $bCleaned);
84
85
            // Use shorter string, and see how much of it is leftover
86
            $len = min(strlen($aCleaned), strlen($bCleaned));
87
            $remaining = $len - ($prefixIndex + $suffixIndex);
88
            $strLengthPercent = $len / max(strlen($a), strlen($b));
89
90
            if ($remaining === 0 && $strLengthPercent > 0.1) {
91
                return true;
92
            }
93
94
            $percentRemaining = $remaining / $len;
95
96
            if ($strLengthPercent > 0.1 && $percentRemaining < 0.4) {
0 ignored issues
show
Unused Code introduced by
This if statement, and the following return statement can be replaced with return $strLengthPercent...percentRemaining < 0.4;.
Loading history...
97
                return true;
98
            }
99
100
            return false;
101
        };
102
        $this->lcsService = new LcsService($comparator);
103
104
        return $this->listByLines($this->oldText, $this->newText);
105
    }
106
107
    /**
108
     * @param \simple_html_dom|\simple_html_dom_node $dom
109
     *
110
     * @return \simple_html_dom_node[]|\simple_html_dom_node|null
111
     */
112
    protected function findListNode($dom)
113
    {
114
        return $dom->find(implode(', ', static::$listTypes), 0);
115
    }
116
117
    protected function listByLines($old, $new)
118
    {
119
        /* @var $newDom \simple_html_dom */
120
        $newDom = HtmlDomParser::str_get_html($new);
121
        /* @var $oldDom \simple_html_dom */
122
        $oldDom = HtmlDomParser::str_get_html($old);
123
124
        $newListNode = $this->findListNode($newDom);
125
        $oldListNode = $this->findListNode($oldDom);
126
127
        $operations = $this->getListItemOperations($oldListNode, $newListNode);
0 ignored issues
show
Bug introduced by
It seems like $oldListNode defined by $this->findListNode($oldDom) on line 125 can also be of type array<integer,object<simple_html_dom_node>> or null; however, Caxy\HtmlDiff\ListDiffLi...getListItemOperations() does only seem to accept object<simple_html_dom_node>, maybe add an additional type check?

If a method or function can return multiple different values and unless you are sure that you only can receive a single value in this context, we recommend to add an additional type check:

/**
 * @return array|string
 */
function returnsDifferentValues($x) {
    if ($x) {
        return 'foo';
    }

    return array();
}

$x = returnsDifferentValues($y);
if (is_array($x)) {
    // $x is an array.
}

If this a common case that PHP Analyzer should handle natively, please let us know by opening an issue.

Loading history...
Bug introduced by
It seems like $newListNode defined by $this->findListNode($newDom) on line 124 can also be of type array<integer,object<simple_html_dom_node>> or null; however, Caxy\HtmlDiff\ListDiffLi...getListItemOperations() does only seem to accept object<simple_html_dom_node>, maybe add an additional type check?

If a method or function can return multiple different values and unless you are sure that you only can receive a single value in this context, we recommend to add an additional type check:

/**
 * @return array|string
 */
function returnsDifferentValues($x) {
    if ($x) {
        return 'foo';
    }

    return array();
}

$x = returnsDifferentValues($y);
if (is_array($x)) {
    // $x is an array.
}

If this a common case that PHP Analyzer should handle natively, please let us know by opening an issue.

Loading history...
128
129
        return $this->processOperations($operations, $oldListNode, $newListNode);
130
    }
131
132
    /**
133
     * @param \simple_html_dom_node $oldListNode
134
     * @param \simple_html_dom_node $newListNode
135
     *
136
     * @return array|Operation[]
137
     */
138
    protected function getListItemOperations($oldListNode, $newListNode)
139
    {
140
        // Prepare arrays of list item content to use in LCS algorithm
141
        $oldListText = $this->getListTextArray($oldListNode);
142
        $newListText = $this->getListTextArray($newListNode);
143
144
        $j = $this->lcsService->longestCommonSubsequence($oldListText, $newListText);
145
146
147
        $m = count($oldListText);
148
        $n = count($newListText);
149
150
        $operations = [];
151
        $lineInOld = 0;
152
        $lineInNew = 0;
153
        $j[$m + 1] = $n + 1;
154
        foreach ($j as $i => $match) {
155
            if ($match !== 0) {
156
                if ($match > ($lineInNew + 1) && $i === ($lineInOld + 1)) {
157
                    // Add items before this
158
                    $operations[] = new Operation(Operation::ADDED, $lineInOld, $lineInOld, $lineInNew + 1, $match - 1);
159
                } elseif ($i > ($lineInOld + 1) && $match === ($lineInNew + 1)) {
160
                    // Delete items before this
161
                    $operations[] = new Operation(Operation::DELETED, $lineInOld + 1, $i - 1, $lineInNew, $lineInNew);
162
                } elseif ($match !== ($lineInNew + 1) && $i !== ($lineInOld + 1)) {
163
                    // Change
164
                    $operations[] = new Operation(Operation::CHANGED, $lineInOld + 1, $i - 1, $lineInNew + 1, $match - 1);
0 ignored issues
show
Coding Style introduced by
This line exceeds maximum limit of 120 characters; contains 122 characters

Overly long lines are hard to read on any screen. Most code styles therefor impose a maximum limit on the number of characters in a line.

Loading history...
165
                }
166
167
                $lineInNew = $match;
168
                $lineInOld = $i;
169
            }
170
        }
171
172
        return $operations;
173
    }
174
175
    protected function getListTextArray($listNode)
176
    {
177
        $output = array();
178
        foreach ($listNode->children() as $listItem) {
179
            $output[] = $this->getRelevantNodeText($listItem);
180
        }
181
182
        return $output;
183
    }
184
185
    protected function getRelevantNodeText(\simple_html_dom_node $node)
0 ignored issues
show
Documentation introduced by
The return type could not be reliably inferred; please add a @return annotation.

Our type inference engine in quite powerful, but sometimes the code does not provide enough clues to go by. In these cases we request you to add a @return annotation as described here.

Loading history...
186
    {
187
        if (!$node->hasChildNodes()) {
188
            return $node->innertext();
189
        }
190
191
        $output = '';
192
        foreach ($node->nodes as $child) {
193
            /* @var $child \simple_html_dom_node */
194
            if (!$child->hasChildNodes()) {
195
                $output .= $child->outertext();
196
            } elseif (in_array($child->nodeName(), static::$listContentTags)) {
197
                $output .= sprintf('<%1$s>%2$s</%1$s>', $child->nodeName(), $this->getRelevantNodeText($child));
198
            }
199
        }
200
201
        return $output;
202
    }
203
204
    /**
205
     * @param $li
206
     */
207 View Code Duplication
    protected function deleteListItem($li)
0 ignored issues
show
Documentation introduced by
The return type could not be reliably inferred; please add a @return annotation.

Our type inference engine in quite powerful, but sometimes the code does not provide enough clues to go by. In these cases we request you to add a @return annotation as described here.

Loading history...
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
208
    {
209
        $li->setAttribute('class', trim($li->getAttribute('class').' '.self::CLASS_LIST_ITEM_DELETED));
210
        $li->innertext = sprintf('<del>%s</del>', $li->innertext);
211
212
        return $li->outertext;
213
    }
214
215
    /**
216
     * @param $li
217
     *
218
     * @return string
219
     */
220 View Code Duplication
    protected function addListItem($li, $replacement = false)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
221
    {
222
        $li->setAttribute('class', trim($li->getAttribute('class').' '.($replacement ? self::CLASS_LIST_ITEM_CHANGED : self::CLASS_LIST_ITEM_ADDED)));
0 ignored issues
show
Coding Style introduced by
This line exceeds maximum limit of 120 characters; contains 150 characters

Overly long lines are hard to read on any screen. Most code styles therefor impose a maximum limit on the number of characters in a line.

Loading history...
223
        $li->innertext = sprintf('<ins>%s</ins>', $li->innertext);
224
225
        return $li->outertext;
226
    }
227
228
    /**
229
     * @param $operations
230
     * @param $oldListNode
231
     * @param $newListNode
232
     *
233
     * @return mixed
234
     */
235
    protected function processOperations($operations, $oldListNode, $newListNode)
236
    {
237
        $output = '';
238
239
        $indexInOld = 0;
240
        $indexInNew = 0;
241
        $lastOperation = null;
242
243
        foreach ($operations as $operation) {
244
            $replaced = false;
245
            while ($operation->startInOld > ($operation->action === Operation::ADDED ? $indexInOld : $indexInOld + 1)) {
246
                $li = $oldListNode->children($indexInOld);
247
                $matchingLi = null;
248
                if ($operation->startInNew > ($operation->action === Operation::DELETED ? $indexInNew
249
                        : $indexInNew + 1)
250
                ) {
251
                    $matchingLi = $newListNode->children($indexInNew);
252
                }
253 View Code Duplication
                if (null !== $matchingLi) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
254
                    $htmlDiff = HtmlDiff::create($li->innertext, $matchingLi->innertext, $this->config);
255
                    $li->innertext = $htmlDiff->build();
256
                    $indexInNew++;
257
                }
258
                $class = self::CLASS_LIST_ITEM_NONE;
259
260
                if ($lastOperation === Operation::DELETED && !$replaced) {
261
                    $class = self::CLASS_LIST_ITEM_CHANGED;
262
                    $replaced = true;
263
                }
264
                $li->setAttribute('class', trim($li->getAttribute('class').' '.$class));
265
266
                $output .= $li->outertext;
267
                $indexInOld++;
268
            }
269
270
            switch ($operation->action) {
271
                case Operation::ADDED:
272 View Code Duplication
                    for ($i = $operation->startInNew; $i <= $operation->endInNew; $i++) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
273
                        $output .= $this->addListItem($newListNode->children($i - 1));
274
                    }
275
                    $indexInNew = $operation->endInNew;
276
                    break;
277
278
                case Operation::DELETED:
279 View Code Duplication
                    for ($i = $operation->startInOld; $i <= $operation->endInOld; $i++) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
280
                        $output .= $this->deleteListItem($oldListNode->children($i - 1));
281
                    }
282
                    $indexInOld = $operation->endInOld;
283
                    break;
284
285
                case Operation::CHANGED:
286
                    $changeDelta = 0;
287 View Code Duplication
                    for ($i = $operation->startInOld; $i <= $operation->endInOld; $i++) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
288
                        $output .= $this->deleteListItem($oldListNode->children($i - 1));
289
                        $changeDelta--;
290
                    }
291 View Code Duplication
                    for ($i = $operation->startInNew; $i <= $operation->endInNew; $i++) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
292
                        $output .= $this->addListItem($newListNode->children($i - 1), $changeDelta < 0);
293
                        $changeDelta++;
294
                    }
295
                    $indexInOld = $operation->endInOld;
296
                    $indexInNew = $operation->endInNew;
297
                    break;
298
            }
299
300
            $lastOperation = $operation->action;
301
        }
302
303
        $replaced = false;
0 ignored issues
show
Unused Code introduced by
$replaced is not used, you could remove the assignment.

This check looks for variable assignements that are either overwritten by other assignments or where the variable is not used subsequently.

$myVar = 'Value';
$higher = false;

if (rand(1, 6) > 3) {
    $higher = true;
} else {
    $higher = false;
}

Both the $myVar assignment in line 1 and the $higher assignment in line 2 are dead. The first because $myVar is never used and the second because $higher is always overwritten for every possible time line.

Loading history...
304
        $oldCount = count($oldListNode->children());
305
        $newCount = count($newListNode->children());
306
        while ($indexInOld < $oldCount) {
307
            $li = $oldListNode->children($indexInOld);
308
            $matchingLi = null;
309
            if ($indexInNew < $newCount) {
310
                $matchingLi = $newListNode->children($indexInNew);
311
            }
312 View Code Duplication
            if (null !== $matchingLi) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
313
                $htmlDiff = HtmlDiff::create($li->innertext(), $matchingLi->innertext(), $this->config);
314
                $li->innertext = $htmlDiff->build();
315
                $indexInNew++;
316
            }
317
            $class = self::CLASS_LIST_ITEM_NONE;
318
319
            if ($lastOperation === Operation::DELETED) {
320
                $class = self::CLASS_LIST_ITEM_CHANGED;
321
            }
322
            $li->setAttribute('class', trim($li->getAttribute('class').' '.$class));
323
324
            $output .= $li->outertext;
325
            $indexInOld++;
326
        }
327
328
        $newListNode->innertext = $output;
329
        $newListNode->setAttribute('class', trim($newListNode->getAttribute('class').' diff-list'));
330
331
        return $newListNode->outertext;
332
    }
333
}
334