Passed
Pull Request — master (#96)
by Sven
03:02
created

ListDiffLines::getRelevantNodeText()   A

Complexity

Conditions 5
Paths 5

Size

Total Lines 27

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 13
CRAP Score 5.009

Importance

Changes 0
Metric Value
cc 5
nc 5
nop 1
dl 0
loc 27
ccs 13
cts 14
cp 0.9286
crap 5.009
rs 9.1768
c 0
b 0
f 0
1
<?php
2
3
namespace Caxy\HtmlDiff;
4
5
use Caxy\HtmlDiff\Strategy\ListItemMatchStrategy;
6
use DOMDocument;
7
use DOMDocumentFragment;
8
use DOMElement;
9
use DOMNode;
10
use DOMNodeList;
11
use DOMText;
12
use DOMXPath;
13
use KubAT\PhpSimple\HtmlDomParser;
14
use LogicException;
15
16
class ListDiffLines extends AbstractDiff
17
{
18
    private const CLASS_LIST_ITEM_ADDED   = 'normal new';
19
    private const CLASS_LIST_ITEM_DELETED = 'removed';
20
    private const CLASS_LIST_ITEM_CHANGED = 'replacement';
21
    private const CLASS_LIST_ITEM_NONE    = 'normal';
22
23
    protected const LIST_TAG_NAMES = ['ul', 'ol', 'dl'];
24
25
    /**
26
     * List of tags that should be included when retrieving
27
     * text from a single list item that will be used in
28
     * matching logic (and only in matching logic).
29
     *
30
     * @see getRelevantNodeText()
31
     *
32
     * @var array
33
     */
34
    protected static $listContentTags = [
35
        'h1', 'h2', 'h3', 'h4', 'h5', 'pre', 'div', 'br', 'hr', 'code',
36
        'input', 'form', 'img', 'span', 'a', 'i', 'b', 'strong', 'em',
37
        'font', 'big', 'del', 'tt', 'sub', 'sup', 'strike',
38
    ];
39
40
    /**
41
     * @var LcsService
42
     */
43
    protected $lcsService;
44
45
    /**
46
     * @param string              $oldText
47
     * @param string              $newText
48
     * @param HtmlDiffConfig|null $config
49
     *
50
     * @return ListDiffLines
51
     */
52 8
    public static function create($oldText, $newText, HtmlDiffConfig $config = null)
53
    {
54 8
        $diff = new self($oldText, $newText);
55
56 8
        if (null !== $config) {
57 8
            $diff->setConfig($config);
58
        }
59
60 8
        return $diff;
61
    }
62
63
    /**
64
     * {@inheritDoc}
65
     */
66 8
    public function build()
67
    {
68 8
        $this->prepare();
69
70 8
        if ($this->hasDiffCache() && $this->getDiffCache()->contains($this->oldText, $this->newText)) {
71
            $this->content = $this->getDiffCache()->fetch($this->oldText, $this->newText);
72
73
            return $this->content;
74
        }
75
76 8
        $this->lcsService = new LcsService(
77 8
            new ListItemMatchStrategy($this->stringUtil, $this->config->getMatchThreshold())
78
        );
79
80 8
        return $this->listByLines($this->oldText, $this->newText);
81
    }
82
83 8
    protected function listByLines(string $old, string $new) : string
84
    {
85 8
        $newDom = new DOMDocument();
86 8
        $newDom->loadHTML($new);
87
88 8
        $oldDom = new DOMDocument();
89 8
        $oldDom->loadHTML($old);
90
91 8
        $newListNode = $this->findListNode($newDom);
92 8
        $oldListNode = $this->findListNode($oldDom);
93
94 8
        $operations = $this->getListItemOperations($oldListNode, $newListNode);
0 ignored issues
show
Compatibility introduced by
$oldListNode of type object<DOMNode> is not a sub-type of object<DOMElement>. It seems like you assume a child class of the class DOMNode to be always present.

This check looks for parameters that are defined as one type in their type hint or doc comment but seem to be used as a narrower type, i.e an implementation of an interface or a subclass.

Consider changing the type of the parameter or doing an instanceof check before assuming your parameter is of the expected type.

Loading history...
Compatibility introduced by
$newListNode of type object<DOMNode> is not a sub-type of object<DOMElement>. It seems like you assume a child class of the class DOMNode to be always present.

This check looks for parameters that are defined as one type in their type hint or doc comment but seem to be used as a narrower type, i.e an implementation of an interface or a subclass.

Consider changing the type of the parameter or doing an instanceof check before assuming your parameter is of the expected type.

Loading history...
95
96 8
        return $this->processOperations($operations, $oldListNode, $newListNode);
0 ignored issues
show
Compatibility introduced by
$oldListNode of type object<DOMNode> is not a sub-type of object<DOMElement>. It seems like you assume a child class of the class DOMNode to be always present.

This check looks for parameters that are defined as one type in their type hint or doc comment but seem to be used as a narrower type, i.e an implementation of an interface or a subclass.

Consider changing the type of the parameter or doing an instanceof check before assuming your parameter is of the expected type.

Loading history...
Compatibility introduced by
$newListNode of type object<DOMNode> is not a sub-type of object<DOMElement>. It seems like you assume a child class of the class DOMNode to be always present.

This check looks for parameters that are defined as one type in their type hint or doc comment but seem to be used as a narrower type, i.e an implementation of an interface or a subclass.

Consider changing the type of the parameter or doing an instanceof check before assuming your parameter is of the expected type.

Loading history...
97
    }
98
99 8
    protected function findListNode(DOMDocument $dom) : DOMNode
100
    {
101 8
        $xPathQuery = '//' . implode('|//', self::LIST_TAG_NAMES);
102 8
        $xPath      = new DOMXPath($dom);
103 8
        $listNodes  = $xPath->query($xPathQuery);
104
105 8
        if ($listNodes->length > 0) {
106 8
            return $listNodes->item(0);
107
        }
108
109
        throw new LogicException('Unable to diff list; missing list node');
110
    }
111
112
    /**
113
     * @return Operation[]
114
     */
115 8
    protected function getListItemOperations(DOMElement $oldListNode, DOMElement $newListNode) : array
116
    {
117
        // Prepare arrays of list item content to use in LCS algorithm
118 8
        $oldListText = $this->getListTextArray($oldListNode);
119 8
        $newListText = $this->getListTextArray($newListNode);
120
121 8
        $lcsMatches = $this->lcsService->longestCommonSubsequence($oldListText, $newListText);
122
123 8
        $oldLength = count($oldListText);
124 8
        $newLength = count($newListText);
125
126 8
        $operations = array();
127 8
        $currentLineInOld = 0;
128 8
        $currentLineInNew = 0;
129 8
        $lcsMatches[$oldLength + 1] = $newLength + 1;
130 8
        foreach ($lcsMatches as $matchInOld => $matchInNew) {
131
            // No matching line in new list
132 8
            if ($matchInNew === 0) {
133 8
                continue;
134
            }
135
136 8
            $nextLineInOld = $currentLineInOld + 1;
137 8
            $nextLineInNew = $currentLineInNew + 1;
138
139 8
            if ($matchInNew > $nextLineInNew && $matchInOld > $nextLineInOld) {
140
                // Change
141 1
                $operations[] = new Operation(
142 1
                    Operation::CHANGED,
143
                    $nextLineInOld,
144 1
                    $matchInOld - 1,
145
                    $nextLineInNew,
146 1
                    $matchInNew - 1
147
                );
148 8
            } elseif ($matchInNew > $nextLineInNew && $matchInOld === $nextLineInOld) {
149
                // Add items before this
150 4
                $operations[] = new Operation(
151 4
                    Operation::ADDED,
152
                    $currentLineInOld,
153
                    $currentLineInOld,
154
                    $nextLineInNew,
155 4
                    $matchInNew - 1
156
                );
157 8
            } elseif ($matchInNew === $nextLineInNew && $matchInOld > $nextLineInOld) {
158
                // Delete items before this
159 3
                $operations[] = new Operation(
160 3
                    Operation::DELETED,
161
                    $nextLineInOld,
162 3
                    $matchInOld - 1,
163
                    $currentLineInNew,
164
                    $currentLineInNew
165
                );
166
            }
167
168 8
            $currentLineInNew = $matchInNew;
169 8
            $currentLineInOld = $matchInOld;
170
        }
171
172 8
        return $operations;
173
    }
174
175
    /**
176
     * @return string[]
177
     */
178 8
    protected function getListTextArray(DOMElement $listNode) : array
179
    {
180 8
        $output = [];
181
182 8
        foreach ($listNode->childNodes as $listItem) {
183 8
            if ($listItem instanceof DOMText) {
184 8
                continue;
185
            }
186
187 8
            $output[] = $this->getRelevantNodeText($listItem);
188
        }
189
190 8
        return $output;
191
    }
192
193 8
    protected function getRelevantNodeText(DOMNode $node) : string
194
    {
195 8
        if ($node->hasChildNodes() === false) {
196
            return $node->textContent;
197
        }
198
199 8
        $output = '';
200
201
        /** @var DOMElement $child */
202 8
        foreach ($node->childNodes as $child) {
203 8
            if ($child->hasChildNodes() === false) {
204 8
                $output .= $this->getOuterText($child);
205
206 8
                continue;
207
            }
208
209 6
            if (in_array($child->tagName, static::$listContentTags, true) === true) {
210 4
                $output .= sprintf(
211 4
                    '<%1$s>%2$s</%1$s>',
212 4
                    $child->tagName,
213 4
                    $this->getRelevantNodeText($child)
214
                );
215
            }
216
        }
217
218 8
        return $output;
219
    }
220
221 4
    protected function deleteListItem(DOMElement $li) : string
222
    {
223 4
        $this->wrapNode($li, 'del');
224
225 4
        $this->appendClassToNode($li, self::CLASS_LIST_ITEM_DELETED);
226
227 4
        return $this->getOuterText($li);
228
    }
229
230 5
    protected function addListItem(DOMElement $li, bool $replacement = false) : string
231
    {
232 5
        $this->wrapNode($li, 'ins');
233
234 5
        $this->appendClassToNode(
235 5
            $li,
236 5
            $replacement === true ? self::CLASS_LIST_ITEM_CHANGED : self::CLASS_LIST_ITEM_ADDED
237
        );
238
239 5
        return $this->getOuterText($li);
240
    }
241
242
    /**
243
     * @param Operation[] $operations
244
     */
245 8
    protected function processOperations(array $operations, DOMElement $oldListNode, DOMElement $newListNode) : string
246
    {
247 8
        $output = '';
248
249 8
        $indexInOld = 0;
250 8
        $indexInNew = 0;
251 8
        $lastOperation = null;
252
253 8
        foreach ($operations as $operation) {
254 6
            $replaced = false;
255 6
            while ($operation->startInOld > ($operation->action === Operation::ADDED ? $indexInOld : $indexInOld + 1)) {
256 5
                $li = $this->getChildNodeByIndex($oldListNode, $indexInOld);
257 5
                $matchingLi = null;
258 5
                if ($operation->startInNew > ($operation->action === Operation::DELETED ? $indexInNew
259 5
                        : $indexInNew + 1)
260
                ) {
261 5
                    $matchingLi = $this->getChildNodeByIndex($newListNode, $indexInNew);
262
                }
263
264 5
                if (null !== $matchingLi) {
265 5
                    $htmlDiff = HtmlDiff::create(
266 5
                        $this->getInnerHtml($li),
267 5
                        $this->getInnerHtml($matchingLi),
268 5
                        $this->config
269
                    );
270
271 5
                    $this->setInnerHtml($li, $htmlDiff->build());
272
273 5
                    $indexInNew++;
274
                }
275
276 5
                $class = self::CLASS_LIST_ITEM_NONE;
277
278 5
                if ($lastOperation === Operation::DELETED && !$replaced) {
279 1
                    $class = self::CLASS_LIST_ITEM_CHANGED;
280 1
                    $replaced = true;
281
                }
282
283 5
                $this->appendClassToNode($li, $class);
284
285 5
                $output .= $this->getOuterText($li);
286 5
                $indexInOld++;
287
            }
288
289 6
            switch ($operation->action) {
290
                case Operation::ADDED:
291 4
                    for ($i = $operation->startInNew; $i <= $operation->endInNew; $i++) {
292 4
                        $output .= $this->addListItem(
293 4
                            $this->getChildNodeByIndex($newListNode, $i - 1)
294
                        );
295
                    }
296 4
                    $indexInNew = $operation->endInNew;
297 4
                    break;
298
299
                case Operation::DELETED:
300 3
                    for ($i = $operation->startInOld; $i <= $operation->endInOld; $i++) {
301 3
                        $output .= $this->deleteListItem(
302 3
                            $this->getChildNodeByIndex($oldListNode, $i - 1)
303
                        );
304
                    }
305 3
                    $indexInOld = $operation->endInOld;
306 3
                    break;
307
308
                case Operation::CHANGED:
309 1
                    $changeDelta = 0;
310 1
                    for ($i = $operation->startInOld; $i <= $operation->endInOld; $i++) {
311 1
                        $output .= $this->deleteListItem(
312 1
                            $this->getChildNodeByIndex($oldListNode, $i - 1)
313
                        );
314 1
                        $changeDelta--;
315
                    }
316 1
                    for ($i = $operation->startInNew; $i <= $operation->endInNew; $i++) {
317 1
                        $output .= $this->addListItem(
318 1
                            $this->getChildNodeByIndex($newListNode, $i - 1),
319 1
                            ($changeDelta < 0)
320
                        );
321 1
                        $changeDelta++;
322
                    }
323 1
                    $indexInOld = $operation->endInOld;
324 1
                    $indexInNew = $operation->endInNew;
325 1
                    break;
326
            }
327
328 6
            $lastOperation = $operation->action;
329
        }
330
331 8
        $oldCount = $this->childCountWithoutTextNode($oldListNode);
332 8
        $newCount = $this->childCountWithoutTextNode($newListNode);
333
334 8
        while ($indexInOld < $oldCount) {
335 5
            $li = $this->getChildNodeByIndex($oldListNode, $indexInOld);
336 5
            $matchingLi = null;
337 5
            if ($indexInNew < $newCount) {
338 5
                $matchingLi = $this->getChildNodeByIndex($newListNode, $indexInNew);
339
            }
340
341 5
            if (null !== $matchingLi) {
342 5
                $htmlDiff = HtmlDiff::create(
343 5
                    $this->getInnerHtml($li),
344 5
                    $this->getInnerHtml($matchingLi),
345 5
                    $this->config
346
                );
347
348 5
                $this->setInnerHtml($li, $htmlDiff->build());
349
350 5
                $indexInNew++;
351
            }
352
353 5
            $class = self::CLASS_LIST_ITEM_NONE;
354
355 5
            if ($lastOperation === Operation::DELETED) {
356 1
                $class = self::CLASS_LIST_ITEM_CHANGED;
357
            }
358
359 5
            $this->appendClassToNode($li, $class);
360
361 5
            $output .= $this->getOuterText($li);
362 5
            $indexInOld++;
363
        }
364
365 8
        $this->setInnerHtml($newListNode, $output);
366 8
        $this->appendClassToNode($newListNode, 'diff-list');
367
368 8
        return $newListNode->ownerDocument->saveHTML($newListNode);
369
    }
370
371 8
    protected function appendClassToNode(DOMElement $node, string $class)
372
    {
373 8
        $node->setAttribute(
374 8
            'class',
375 8
            trim(sprintf('%s %s', $node->getAttribute('class'), $class))
376
        );
377 8
    }
378
379 8
    private function getOuterText(DOMNode $node) : string
380
    {
381 8
        return $node->ownerDocument->saveHTML($node);
382
    }
383
384 8
    private function getInnerHtml(DOMNode $node) : string
385
    {
386 8
        $bufferDom = new DOMDocument();
387
388 8
        foreach($node->childNodes as $childNode)
0 ignored issues
show
Coding Style introduced by
Expected 1 space(s) after FOREACH keyword; 0 found
Loading history...
389
        {
390 8
            $bufferDom->appendChild($bufferDom->importNode($childNode, true));
391
        }
392
393 8
        return trim($bufferDom->saveHTML());
394
    }
395
396 8
    private function setInnerHtml(DOMNode $node, string $html) : void
397
    {
398 8
        $html = sprintf('<%s>%s</%s>', 'body', $html, 'body');
399
400 8
        $node->nodeValue = '';
401
402 8
        $bufferDom = new DOMDocument();
403 8
        $bufferDom->loadHTML($html);
404
405 8
        $bodyNode = $bufferDom->getElementsByTagName('body')->item(0);
406
407 8
        foreach ($bodyNode->childNodes as $childNode) {
408 8
            $node->appendChild($node->ownerDocument->importNode($childNode, true));
409
        }
410 8
    }
411
412 6
    private function wrapNode(DOMNode $node, string $tagName) : void
413
    {
414 6
        $this->setInnerHtml(
415 6
            $node,
416 6
            sprintf('<%s>%s</%s>', $tagName, $this->getInnerHtml($node), $tagName)
417
        );
418 6
    }
419
420 8
    private function childCountWithoutTextNode(DOMNode $node) : int
421
    {
422 8
        $counter = 0;
423
424 8
        foreach ($node->childNodes as $childNode) {
425 8
            if ($childNode instanceof DOMText) {
426 8
                continue;
427
            }
428
429 8
            $counter++;
430
        }
431
432 8
        return $counter;
433
    }
434
435 8
    private function getChildNodeByIndex(DOMNode $node, int $index) : DOMElement
436
    {
437 8
        static $listCache = [];
438
439 8
        $nodeHash = spl_object_hash($node);
440
441 8
        if (isset($listCache[$nodeHash]) === true) {
442 8
            return $listCache[$nodeHash][$index];
443
        }
444
445 8
        $listCache[$nodeHash] = [];
446
447 8
        foreach ($node->childNodes as $childNode) {
448 8
            if ($childNode instanceof DOMText === false) {
449 8
                $listCache[$nodeHash][] = $childNode;
450
            }
451
        }
452
453 8
        return $this->getChildNodeByIndex($node, $index);
454
    }
455
}
456