Test Failed
Pull Request — master (#96)
by Sven
02:58
created

ListDiffLines::deleteListItem()   A

Complexity

Conditions 1
Paths 1

Size

Total Lines 8

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 1
CRAP Score 1

Importance

Changes 0
Metric Value
cc 1
nc 1
nop 1
dl 0
loc 8
ccs 1
cts 1
cp 1
crap 1
rs 10
c 0
b 0
f 0
1
<?php
2
3
namespace Caxy\HtmlDiff;
4
5
use Caxy\HtmlDiff\Strategy\ListItemMatchStrategy;
6
use DOMDocument;
7
use DOMDocumentFragment;
8
use DOMElement;
9
use DOMNode;
10
use DOMNodeList;
11
use DOMText;
12
use KubAT\PhpSimple\HtmlDomParser;
13
use LogicException;
14
15
class ListDiffLines extends AbstractDiff
16
{
17
    private const CLASS_LIST_ITEM_ADDED   = 'normal new';
18
    private const CLASS_LIST_ITEM_DELETED = 'removed';
19
    private const CLASS_LIST_ITEM_CHANGED = 'replacement';
20
    private const CLASS_LIST_ITEM_NONE    = 'normal';
21
22
    protected const LIST_TAG_NAMES = ['ul', 'ol', 'dl'];
23
24
    /**
25
     * List of tags that should be included when retrieving
26
     * text from a single list item that will be used in
27
     * matching logic (and only in matching logic).
28
     *
29
     * @see getRelevantNodeText()
30
     *
31
     * @var array
32
     */
33
    protected static $listContentTags = [
34
        'h1', 'h2', 'h3', 'h4', 'h5', 'pre', 'div', 'br', 'hr', 'code',
35
        'input', 'form', 'img', 'span', 'a', 'i', 'b', 'strong', 'em',
36
        'font', 'big', 'del', 'tt', 'sub', 'sup', 'strike',
37
    ];
38
39
    /**
40
     * @var LcsService
41
     */
42
    protected $lcsService;
43
44 7
    /**
45
     * @param string              $oldText
46 7
     * @param string              $newText
47
     * @param HtmlDiffConfig|null $config
48 7
     *
49 7
     * @return ListDiffLines
50
     */
51
    public static function create($oldText, $newText, HtmlDiffConfig $config = null)
52 7
    {
53
        $diff = new self($oldText, $newText);
54
55
        if (null !== $config) {
56
            $diff->setConfig($config);
57
        }
58 7
59
        return $diff;
60 7
    }
61
62 7
    /**
63
     * {@inheritDoc}
64
     */
65
    public function build()
66
    {
67
        $this->prepare();
68 7
69 7
        if ($this->hasDiffCache() && $this->getDiffCache()->contains($this->oldText, $this->newText)) {
70
            $this->content = $this->getDiffCache()->fetch($this->oldText, $this->newText);
71 7
72
            return $this->content;
73
        }
74
75
        $this->lcsService = new LcsService(
76
            new ListItemMatchStrategy($this->stringUtil, $this->config->getMatchThreshold())
77
        );
78
79
        return $this->listByLines($this->oldText, $this->newText);
80 7
    }
81
82
    protected function listByLines(string $old, string $new) : string
83 7
    {
84
        $newDom = new DOMDocument();
85 7
        $newDom->loadHTML($new);
86
87 7
        $oldDom = new DOMDocument();
88 7
        $oldDom->loadHTML($old);
89
90 7
        $newListNode = $this->findListNode($newDom);
91
        $oldListNode = $this->findListNode($oldDom);
92 7
93
        $operations = $this->getListItemOperations($oldListNode, $newListNode);
0 ignored issues
show
Compatibility introduced by
$oldListNode of type object<DOMNode> is not a sub-type of object<DOMElement>. It seems like you assume a child class of the class DOMNode to be always present.

This check looks for parameters that are defined as one type in their type hint or doc comment but seem to be used as a narrower type, i.e an implementation of an interface or a subclass.

Consider changing the type of the parameter or doing an instanceof check before assuming your parameter is of the expected type.

Loading history...
Compatibility introduced by
$newListNode of type object<DOMNode> is not a sub-type of object<DOMElement>. It seems like you assume a child class of the class DOMNode to be always present.

This check looks for parameters that are defined as one type in their type hint or doc comment but seem to be used as a narrower type, i.e an implementation of an interface or a subclass.

Consider changing the type of the parameter or doing an instanceof check before assuming your parameter is of the expected type.

Loading history...
94
95
        return $this->processOperations($operations, $oldListNode, $newListNode);
0 ignored issues
show
Compatibility introduced by
$oldListNode of type object<DOMNode> is not a sub-type of object<DOMElement>. It seems like you assume a child class of the class DOMNode to be always present.

This check looks for parameters that are defined as one type in their type hint or doc comment but seem to be used as a narrower type, i.e an implementation of an interface or a subclass.

Consider changing the type of the parameter or doing an instanceof check before assuming your parameter is of the expected type.

Loading history...
Compatibility introduced by
$newListNode of type object<DOMNode> is not a sub-type of object<DOMElement>. It seems like you assume a child class of the class DOMNode to be always present.

This check looks for parameters that are defined as one type in their type hint or doc comment but seem to be used as a narrower type, i.e an implementation of an interface or a subclass.

Consider changing the type of the parameter or doing an instanceof check before assuming your parameter is of the expected type.

Loading history...
96
    }
97
98
    protected function findListNode(DOMDocument $dom) : DOMNode
99
    {
100 7
        foreach (self::LIST_TAG_NAMES as $tagName) {
101
            $listNodes = $dom->getElementsByTagName($tagName);
102 7
103
            if ($listNodes->length > 0) {
104
                return $listNodes->item(0);
105
            }
106
        }
107
108
        throw new LogicException('Unable to diff list; missing list node');
109
    }
110
111 7
    /**
112
     * @return Operation[]
113
     */
114 7
    protected function getListItemOperations(DOMElement $oldListNode, DOMElement $newListNode) : array
115 7
    {
116
        // Prepare arrays of list item content to use in LCS algorithm
117 7
        $oldListText = $this->getListTextArray($oldListNode);
118
        $newListText = $this->getListTextArray($newListNode);
119 7
120 7
        $lcsMatches = $this->lcsService->longestCommonSubsequence($oldListText, $newListText);
121
122 7
        $oldLength = count($oldListText);
123 7
        $newLength = count($newListText);
124 7
125 7
        $operations = array();
126 7
        $currentLineInOld = 0;
127
        $currentLineInNew = 0;
128 7
        $lcsMatches[$oldLength + 1] = $newLength + 1;
129 7
        foreach ($lcsMatches as $matchInOld => $matchInNew) {
130
            // No matching line in new list
131
            if ($matchInNew === 0) {
132 7
                continue;
133 7
            }
134
135 7
            $nextLineInOld = $currentLineInOld + 1;
136
            $nextLineInNew = $currentLineInNew + 1;
137 1
138 1
            if ($matchInNew > $nextLineInNew && $matchInOld > $nextLineInOld) {
139
                // Change
140 1
                $operations[] = new Operation(
141
                    Operation::CHANGED,
142 1
                    $nextLineInOld,
143
                    $matchInOld - 1,
144 7
                    $nextLineInNew,
145
                    $matchInNew - 1
146 3
                );
147 3
            } elseif ($matchInNew > $nextLineInNew && $matchInOld === $nextLineInOld) {
148
                // Add items before this
149
                $operations[] = new Operation(
150
                    Operation::ADDED,
151 3
                    $currentLineInOld,
152
                    $currentLineInOld,
153 7
                    $nextLineInNew,
154
                    $matchInNew - 1
155 2
                );
156 2
            } elseif ($matchInNew === $nextLineInNew && $matchInOld > $nextLineInOld) {
157
                // Delete items before this
158 2
                $operations[] = new Operation(
159
                    Operation::DELETED,
160
                    $nextLineInOld,
161
                    $matchInOld - 1,
162
                    $currentLineInNew,
163
                    $currentLineInNew
164 7
                );
165 7
            }
166
167
            $currentLineInNew = $matchInNew;
168 7
            $currentLineInOld = $matchInOld;
169
        }
170
171
        return $operations;
172
    }
173
174
    /**
175
     * @return string[]
176 7
     */
177
    protected function getListTextArray(DOMElement $listNode) : array
178 7
    {
179 7
        $output = [];
180 7
181
        foreach ($listNode->childNodes as $listItem) {
182
            if ($listItem instanceof DOMText) {
183 7
                continue;
184
            }
185
186
            $output[] = $this->getRelevantNodeText($listItem);
187
        }
188
189
        return $output;
190
    }
191 7
192
    protected function getRelevantNodeText(DOMNode $node) : string
193 7
    {
194 3
        if ($node->hasChildNodes() === false) {
195
            return $node->textContent;
196
        }
197 5
198 5
        $output = '';
199
200 5
        /** @var DOMElement $child */
201 5
        foreach ($node->childNodes as $child) {
202 4
            if ($child->hasChildNodes() === false) {
203
                $output .= $this->getOuterText($child);
204
205
                continue;
206
            }
207 5
208
            if (in_array($child->tagName, static::$listContentTags, true) === true) {
209
                $output .= sprintf(
210
                    '<%1$s>%2$s</%1$s>',
211
                    $child->tagName,
212
                    $this->getRelevantNodeText($child)
213
                );
214
            }
215 3
        }
216
217 3
        return $output;
218 3
    }
219
220 3
    protected function deleteListItem(DOMElement $li) : string
221
    {
222
        $this->wrapNode($li, 'del');
223
224
        $this->appendClassToNode($li, self::CLASS_LIST_ITEM_DELETED);
225
226
        return $this->getOuterText($li);
227
    }
228
229 4
    protected function addListItem(DOMElement $li, bool $replacement = false) : string
230
    {
231 4
        $this->wrapNode($li, 'ins');
232 4
233
        $this->appendClassToNode(
234 4
            $li,
235
            $replacement === true ? self::CLASS_LIST_ITEM_CHANGED : self::CLASS_LIST_ITEM_ADDED
236
        );
237
238
        return $this->getOuterText($li);
239
    }
240
241
    /**
242
     * @param Operation[] $operations
243
     */
244 7
    protected function processOperations(array $operations, DOMElement $oldListNode, DOMElement $newListNode) : string
245
    {
246 7
        $output = '';
247
248 7
        $indexInOld = 0;
249 7
        $indexInNew = 0;
250 7
        $lastOperation = null;
251
252 7
        foreach ($operations as $operation) {
253 5
            $replaced = false;
254 5
            while ($operation->startInOld > ($operation->action === Operation::ADDED ? $indexInOld : $indexInOld + 1)) {
255 4
                $li = $this->getChildNodeByIndex($oldListNode, $indexInOld);
256 4
                $matchingLi = null;
257 4
                if ($operation->startInNew > ($operation->action === Operation::DELETED ? $indexInNew
258 4
                        : $indexInNew + 1)
259
                ) {
260 4
                    $matchingLi = $this->getChildNodeByIndex($newListNode, $indexInNew);
261
                }
262 4
263 4
                if (null !== $matchingLi) {
264 4
                    $htmlDiff = HtmlDiff::create(
265 4
                        $this->getInnerHtml($li),
266
                        $this->getInnerHtml($matchingLi),
267 4
                        $this->config
268
                    );
269 4
270 1
                    $this->setInnerHtml($li, $htmlDiff->build());
271 1
272
                    $indexInNew++;
273 4
                }
274
275 4
                $class = self::CLASS_LIST_ITEM_NONE;
276 4
277
                if ($lastOperation === Operation::DELETED && !$replaced) {
278
                    $class = self::CLASS_LIST_ITEM_CHANGED;
279 5
                    $replaced = true;
280
                }
281 3
282 3
                $this->appendClassToNode($li, $class);
283
284 3
                $output .= $this->getOuterText($li);
285 3
                $indexInOld++;
286
            }
287
288 2
            switch ($operation->action) {
289 2
                case Operation::ADDED:
290
                    for ($i = $operation->startInNew; $i <= $operation->endInNew; $i++) {
291 2
                        $output .= $this->addListItem(
292 2
                            $this->getChildNodeByIndex($newListNode, $i - 1)
293
                        );
294
                    }
295 1
                    $indexInNew = $operation->endInNew;
296 1
                    break;
297 1
298 1
                case Operation::DELETED:
299
                    for ($i = $operation->startInOld; $i <= $operation->endInOld; $i++) {
300 1
                        $output .= $this->deleteListItem(
301 1
                            $this->getChildNodeByIndex($oldListNode, $i - 1)
302 1
                        );
303
                    }
304 1
                    $indexInOld = $operation->endInOld;
305 1
                    break;
306 1
307
                case Operation::CHANGED:
308
                    $changeDelta = 0;
309 5
                    for ($i = $operation->startInOld; $i <= $operation->endInOld; $i++) {
310
                        $output .= $this->deleteListItem(
311
                            $this->getChildNodeByIndex($oldListNode, $i - 1)
312 7
                        );
313 7
                        $changeDelta--;
314 7
                    }
315 4
                    for ($i = $operation->startInNew; $i <= $operation->endInNew; $i++) {
316 4
                        $output .= $this->addListItem(
317 4
                            $this->getChildNodeByIndex($newListNode, $i - 1),
318 4
                            ($changeDelta < 0)
319
                        );
320 4
                        $changeDelta++;
321 4
                    }
322 4
                    $indexInOld = $operation->endInOld;
323 4
                    $indexInNew = $operation->endInNew;
324
                    break;
325 4
            }
326
327 4
            $lastOperation = $operation->action;
328
        }
329
330 4
        $oldCount = $this->childCountWithoutTextNode($oldListNode);
331
        $newCount = $this->childCountWithoutTextNode($newListNode);
332 4
333 4
        while ($indexInOld < $oldCount) {
334
            $li = $this->getChildNodeByIndex($oldListNode, $indexInOld);
335
            $matchingLi = null;
336 7
            if ($indexInNew < $newCount) {
337 7
                $matchingLi = $this->getChildNodeByIndex($newListNode, $indexInNew);
338
            }
339 7
340
            if (null !== $matchingLi) {
341
                $htmlDiff = HtmlDiff::create(
342
                    $this->getInnerHtml($li),
343
                    $this->getInnerHtml($matchingLi),
344
                    $this->config
345
                );
346 5
347
                $this->setInnerHtml($li, $htmlDiff->build());
348 5
349 5
                $indexInNew++;
350
            }
351
352
            $class = self::CLASS_LIST_ITEM_NONE;
353
354
            if ($lastOperation === Operation::DELETED) {
355
                $class = self::CLASS_LIST_ITEM_CHANGED;
356
            }
357
358
            $this->appendClassToNode($li, $class);
359
360
            $output .= $this->getOuterText($li);
361
            $indexInOld++;
362
        }
363
364
        $this->setInnerHtml($newListNode, $output);
365
        $this->appendClassToNode($newListNode, 'diff-list');
366
367
        return $newListNode->ownerDocument->saveHTML($newListNode);
368
    }
369
370
    protected function appendClassToNode(DOMElement $node, string $class)
371
    {
372
        $node->setAttribute(
373
            'class',
374
            trim(sprintf('%s %s', $node->getAttribute('class'), $class))
375
        );
376
    }
377
378
    private function getOuterText(DOMNode $node) : string
379
    {
380
        return $node->ownerDocument->saveHTML($node);
381
    }
382
383
    private function getInnerHtml(DOMNode $node) : string
384
    {
385
        $bufferDom = new DOMDocument();
386
387
        foreach($node->childNodes as $childNode)
0 ignored issues
show
Coding Style introduced by
Expected 1 space(s) after FOREACH keyword; 0 found
Loading history...
388
        {
389
            $bufferDom->appendChild($bufferDom->importNode($childNode, true));
390
        }
391
392
        return trim($bufferDom->saveHTML());
393
    }
394
395
    private function setInnerHtml(DOMNode $node, string $html) : void
396
    {
397
        $html = sprintf('<%s>%s</%s>', 'body', $html, 'body');
398
399
        $node->nodeValue = '';
400
401
        $bufferDom = new DOMDocument();
402
        $bufferDom->loadHTML($html);
403
404
        $bodyNode = $bufferDom->getElementsByTagName('body')->item(0);
405
406
        foreach ($bodyNode->childNodes as $childNode) {
407
            $node->appendChild($node->ownerDocument->importNode($childNode, true));
408
        }
409
    }
410
411
    private function wrapNode(DOMNode $node, string $tagName) : void
412
    {
413
        $this->setInnerHtml(
414
            $node,
415
            sprintf('<%s>%s</%s>', $tagName, $this->getInnerHtml($node), $tagName)
416
        );
417
    }
418
419
    private function childCountWithoutTextNode(DOMNode $node) : int
420
    {
421
        $counter = 0;
422
423
        foreach ($node->childNodes as $childNode) {
424
            if ($childNode instanceof DOMText) {
425
                continue;
426
            }
427
428
            $counter++;
429
        }
430
431
        return $counter;
432
    }
433
434
    private function getChildNodeByIndex(DOMNode $node, int $index) : DOMElement
435
    {
436
        static $listCache = [];
437
438
        $nodeHash = spl_object_hash($node);
439
440
        if (isset($listCache[$nodeHash]) === true) {
441
            return $listCache[$nodeHash][$index];
442
        }
443
444
        $listCache[$nodeHash] = [];
445
446
        foreach ($node->childNodes as $childNode) {
447
            if ($childNode instanceof DOMText === false) {
448
                $listCache[$nodeHash][] = $childNode;
449
            }
450
        }
451
452
        return $this->getChildNodeByIndex($node, $index);
453
    }
454
}
455