1 | <?php |
||
2 | |||
3 | namespace Caxy\HtmlDiff; |
||
4 | |||
5 | use Caxy\HtmlDiff\Strategy\ListItemMatchStrategy; |
||
6 | use DOMDocument; |
||
7 | use DOMElement; |
||
8 | use DOMNode; |
||
9 | use DOMText; |
||
10 | use DOMXPath; |
||
11 | use LogicException; |
||
12 | |||
13 | class ListDiffLines extends AbstractDiff |
||
14 | { |
||
15 | private const CLASS_LIST_ITEM_ADDED = 'normal new'; |
||
16 | private const CLASS_LIST_ITEM_DELETED = 'removed'; |
||
17 | private const CLASS_LIST_ITEM_CHANGED = 'replacement'; |
||
18 | private const CLASS_LIST_ITEM_NONE = 'normal'; |
||
19 | |||
20 | protected const LIST_TAG_NAMES = ['ul', 'ol', 'dl']; |
||
21 | |||
22 | /** |
||
23 | * List of tags that should be included when retrieving |
||
24 | * text from a single list item that will be used in |
||
25 | * matching logic (and only in matching logic). |
||
26 | * |
||
27 | * @see getRelevantNodeText() |
||
28 | * |
||
29 | * @var array |
||
30 | */ |
||
31 | protected static $listContentTags = [ |
||
32 | 'h1', 'h2', 'h3', 'h4', 'h5', 'pre', 'div', 'br', 'hr', 'code', |
||
33 | 'input', 'form', 'img', 'span', 'a', 'i', 'b', 'strong', 'em', |
||
34 | 'font', 'big', 'del', 'tt', 'sub', 'sup', 'strike', |
||
35 | ]; |
||
36 | |||
37 | /** |
||
38 | * @var LcsService |
||
39 | */ |
||
40 | protected $lcsService; |
||
41 | |||
42 | /** |
||
43 | * @var array<string, DOMElement> |
||
44 | */ |
||
45 | private $nodeCache = []; |
||
46 | |||
47 | /** |
||
48 | * @param string $oldText |
||
49 | * @param string $newText |
||
50 | * @param HtmlDiffConfig|null $config |
||
51 | * |
||
52 | * @return ListDiffLines |
||
53 | */ |
||
54 | 8 | public static function create($oldText, $newText, HtmlDiffConfig $config = null) |
|
55 | { |
||
56 | 8 | $diff = new self($oldText, $newText); |
|
57 | |||
58 | 8 | if (null !== $config) { |
|
59 | 8 | $diff->setConfig($config); |
|
60 | } |
||
61 | |||
62 | 8 | return $diff; |
|
63 | } |
||
64 | |||
65 | /** |
||
66 | * {@inheritDoc} |
||
67 | */ |
||
68 | 8 | public function build() |
|
69 | { |
||
70 | 8 | $this->prepare(); |
|
71 | |||
72 | 8 | if ($this->hasDiffCache() && $this->getDiffCache()->contains($this->oldText, $this->newText)) { |
|
73 | $this->content = $this->getDiffCache()->fetch($this->oldText, $this->newText); |
||
74 | |||
75 | return $this->content; |
||
76 | } |
||
77 | |||
78 | 8 | $this->lcsService = new LcsService( |
|
79 | 8 | new ListItemMatchStrategy($this->stringUtil, $this->config->getMatchThreshold()) |
|
80 | ); |
||
81 | |||
82 | 8 | return $this->listByLines($this->oldText, $this->newText); |
|
83 | } |
||
84 | |||
85 | 8 | protected function listByLines(string $old, string $new) : string |
|
86 | { |
||
87 | 8 | $new = mb_convert_encoding($new, 'HTML-ENTITIES', "UTF-8"); |
|
88 | 8 | $old = mb_convert_encoding($old, 'HTML-ENTITIES', "UTF-8"); |
|
89 | |||
90 | 8 | $newDom = new DOMDocument(); |
|
91 | 8 | $newDom->loadHTML($new); |
|
92 | |||
93 | 8 | $oldDom = new DOMDocument(); |
|
94 | 8 | $oldDom->loadHTML($old); |
|
95 | |||
96 | 8 | $newListNode = $this->findListNode($newDom); |
|
97 | 8 | $oldListNode = $this->findListNode($oldDom); |
|
98 | |||
99 | 8 | $operations = $this->getListItemOperations($oldListNode, $newListNode); |
|
100 | |||
101 | 8 | return $this->processOperations($operations, $oldListNode, $newListNode); |
|
102 | } |
||
103 | |||
104 | 8 | protected function findListNode(DOMDocument $dom) : DOMNode |
|
105 | { |
||
106 | 8 | $xPathQuery = '//' . implode('|//', self::LIST_TAG_NAMES); |
|
107 | 8 | $xPath = new DOMXPath($dom); |
|
108 | 8 | $listNodes = $xPath->query($xPathQuery); |
|
109 | |||
110 | 8 | if ($listNodes->length > 0) { |
|
111 | 8 | return $listNodes->item(0); |
|
112 | } |
||
113 | |||
114 | throw new LogicException('Unable to diff list; missing list node'); |
||
115 | } |
||
116 | |||
117 | /** |
||
118 | * @return Operation[] |
||
119 | */ |
||
120 | 8 | protected function getListItemOperations(DOMElement $oldListNode, DOMElement $newListNode) : array |
|
121 | { |
||
122 | // Prepare arrays of list item content to use in LCS algorithm |
||
123 | 8 | $oldListText = $this->getListTextArray($oldListNode); |
|
124 | 8 | $newListText = $this->getListTextArray($newListNode); |
|
125 | |||
126 | 8 | $lcsMatches = $this->lcsService->longestCommonSubsequence($oldListText, $newListText); |
|
127 | |||
128 | 8 | $oldLength = count($oldListText); |
|
129 | 8 | $newLength = count($newListText); |
|
130 | |||
131 | 8 | $operations = array(); |
|
132 | 8 | $currentLineInOld = 0; |
|
133 | 8 | $currentLineInNew = 0; |
|
134 | 8 | $lcsMatches[$oldLength + 1] = $newLength + 1; |
|
135 | 8 | foreach ($lcsMatches as $matchInOld => $matchInNew) { |
|
136 | // No matching line in new list |
||
137 | 8 | if ($matchInNew === 0) { |
|
138 | 8 | continue; |
|
139 | } |
||
140 | |||
141 | 8 | $nextLineInOld = $currentLineInOld + 1; |
|
142 | 8 | $nextLineInNew = $currentLineInNew + 1; |
|
143 | |||
144 | 8 | if ($matchInNew > $nextLineInNew && $matchInOld > $nextLineInOld) { |
|
145 | // Change |
||
146 | 1 | $operations[] = new Operation( |
|
147 | 1 | Operation::CHANGED, |
|
148 | $nextLineInOld, |
||
149 | 1 | $matchInOld - 1, |
|
150 | $nextLineInNew, |
||
151 | 1 | $matchInNew - 1 |
|
152 | ); |
||
153 | 8 | } elseif ($matchInNew > $nextLineInNew && $matchInOld === $nextLineInOld) { |
|
154 | // Add items before this |
||
155 | 3 | $operations[] = new Operation( |
|
156 | 3 | Operation::ADDED, |
|
157 | $currentLineInOld, |
||
158 | $currentLineInOld, |
||
159 | $nextLineInNew, |
||
160 | 3 | $matchInNew - 1 |
|
161 | ); |
||
162 | 8 | } elseif ($matchInNew === $nextLineInNew && $matchInOld > $nextLineInOld) { |
|
163 | // Delete items before this |
||
164 | 3 | $operations[] = new Operation( |
|
165 | 3 | Operation::DELETED, |
|
166 | $nextLineInOld, |
||
167 | 3 | $matchInOld - 1, |
|
168 | $currentLineInNew, |
||
169 | $currentLineInNew |
||
170 | ); |
||
171 | } |
||
172 | |||
173 | 8 | $currentLineInNew = $matchInNew; |
|
174 | 8 | $currentLineInOld = $matchInOld; |
|
175 | } |
||
176 | |||
177 | 8 | return $operations; |
|
178 | } |
||
179 | |||
180 | /** |
||
181 | * @return string[] |
||
182 | */ |
||
183 | 8 | protected function getListTextArray(DOMElement $listNode) : array |
|
184 | { |
||
185 | 8 | $output = []; |
|
186 | |||
187 | 8 | foreach ($listNode->childNodes as $listItem) { |
|
188 | 8 | if ($listItem instanceof DOMText) { |
|
189 | 8 | continue; |
|
190 | } |
||
191 | |||
192 | 8 | $output[] = $this->getRelevantNodeText($listItem); |
|
193 | } |
||
194 | |||
195 | 8 | return $output; |
|
196 | } |
||
197 | |||
198 | 8 | protected function getRelevantNodeText(DOMNode $node) : string |
|
199 | { |
||
200 | 8 | if ($node->hasChildNodes() === false) { |
|
201 | return $node->textContent; |
||
202 | } |
||
203 | |||
204 | 8 | $output = ''; |
|
205 | |||
206 | /** @var DOMElement $child */ |
||
207 | 8 | foreach ($node->childNodes as $child) { |
|
208 | 8 | if ($child->hasChildNodes() === false) { |
|
209 | 8 | $output .= $this->getOuterText($child); |
|
210 | |||
211 | 8 | continue; |
|
212 | } |
||
213 | |||
214 | 6 | if (in_array($child->tagName, static::$listContentTags, true) === true) { |
|
215 | 4 | $output .= sprintf( |
|
216 | 4 | '<%1$s>%2$s</%1$s>', |
|
217 | 4 | $child->tagName, |
|
218 | 4 | $this->getRelevantNodeText($child) |
|
219 | ); |
||
220 | } |
||
221 | } |
||
222 | |||
223 | 8 | return $output; |
|
224 | } |
||
225 | |||
226 | 4 | protected function deleteListItem(DOMElement $li) : string |
|
227 | { |
||
228 | 4 | $this->wrapNodeContent($li, 'del'); |
|
229 | |||
230 | 4 | $this->appendClassToNode($li, self::CLASS_LIST_ITEM_DELETED); |
|
231 | |||
232 | 4 | return $this->getOuterText($li); |
|
233 | } |
||
234 | |||
235 | 4 | protected function addListItem(DOMElement $li, bool $replacement = false) : string |
|
236 | { |
||
237 | 4 | $this->wrapNodeContent($li, 'ins'); |
|
238 | |||
239 | 4 | $this->appendClassToNode( |
|
240 | 4 | $li, |
|
241 | 4 | $replacement === true ? self::CLASS_LIST_ITEM_CHANGED : self::CLASS_LIST_ITEM_ADDED |
|
242 | ); |
||
243 | |||
244 | 4 | return $this->getOuterText($li); |
|
245 | } |
||
246 | |||
247 | /** |
||
248 | * @param Operation[] $operations |
||
249 | */ |
||
250 | 8 | protected function processOperations(array $operations, DOMElement $oldListNode, DOMElement $newListNode) : string |
|
251 | { |
||
252 | 8 | $output = ''; |
|
253 | |||
254 | 8 | $indexInOld = 0; |
|
255 | 8 | $indexInNew = 0; |
|
256 | 8 | $lastOperation = null; |
|
257 | |||
258 | 8 | foreach ($operations as $operation) { |
|
259 | 6 | $replaced = false; |
|
260 | 6 | while ($operation->startInOld > ($operation->action === Operation::ADDED ? $indexInOld : $indexInOld + 1)) { |
|
261 | 5 | $li = $this->getChildNodeByIndex($oldListNode, $indexInOld); |
|
262 | 5 | $matchingLi = null; |
|
263 | 5 | if ($operation->startInNew > ($operation->action === Operation::DELETED ? $indexInNew |
|
264 | 5 | : $indexInNew + 1) |
|
265 | ) { |
||
266 | 5 | $matchingLi = $this->getChildNodeByIndex($newListNode, $indexInNew); |
|
267 | } |
||
268 | |||
269 | 5 | if (null !== $matchingLi) { |
|
270 | 5 | $htmlDiff = HtmlDiff::create( |
|
271 | 5 | $this->getInnerHtml($li), |
|
272 | 5 | $this->getInnerHtml($matchingLi), |
|
273 | 5 | $this->config |
|
274 | ); |
||
275 | |||
276 | 5 | $this->setInnerHtml($li, $htmlDiff->build()); |
|
277 | |||
278 | 5 | $indexInNew++; |
|
279 | } |
||
280 | |||
281 | 5 | $class = self::CLASS_LIST_ITEM_NONE; |
|
282 | |||
283 | 5 | if ($lastOperation === Operation::DELETED && !$replaced) { |
|
284 | 2 | $class = self::CLASS_LIST_ITEM_CHANGED; |
|
285 | 2 | $replaced = true; |
|
286 | } |
||
287 | |||
288 | 5 | $this->appendClassToNode($li, $class); |
|
289 | |||
290 | 5 | $output .= $this->getOuterText($li); |
|
291 | 5 | $indexInOld++; |
|
292 | } |
||
293 | |||
294 | 6 | switch ($operation->action) { |
|
295 | case Operation::ADDED: |
||
296 | 3 | for ($i = $operation->startInNew; $i <= $operation->endInNew; $i++) { |
|
297 | 3 | $output .= $this->addListItem( |
|
298 | 3 | $this->getChildNodeByIndex($newListNode, $i - 1) |
|
299 | ); |
||
300 | } |
||
301 | 3 | $indexInNew = $operation->endInNew; |
|
302 | 3 | break; |
|
303 | |||
304 | case Operation::DELETED: |
||
305 | 3 | for ($i = $operation->startInOld; $i <= $operation->endInOld; $i++) { |
|
306 | 3 | $output .= $this->deleteListItem( |
|
307 | 3 | $this->getChildNodeByIndex($oldListNode, $i - 1) |
|
308 | ); |
||
309 | } |
||
310 | 3 | $indexInOld = $operation->endInOld; |
|
311 | 3 | break; |
|
312 | |||
313 | case Operation::CHANGED: |
||
314 | 1 | $changeDelta = 0; |
|
315 | 1 | for ($i = $operation->startInOld; $i <= $operation->endInOld; $i++) { |
|
316 | 1 | $output .= $this->deleteListItem( |
|
317 | 1 | $this->getChildNodeByIndex($oldListNode, $i - 1) |
|
318 | ); |
||
319 | 1 | $changeDelta--; |
|
320 | } |
||
321 | 1 | for ($i = $operation->startInNew; $i <= $operation->endInNew; $i++) { |
|
322 | 1 | $output .= $this->addListItem( |
|
323 | 1 | $this->getChildNodeByIndex($newListNode, $i - 1), |
|
324 | 1 | ($changeDelta < 0) |
|
325 | ); |
||
326 | 1 | $changeDelta++; |
|
327 | } |
||
328 | 1 | $indexInOld = $operation->endInOld; |
|
329 | 1 | $indexInNew = $operation->endInNew; |
|
330 | 1 | break; |
|
331 | } |
||
332 | |||
333 | 6 | $lastOperation = $operation->action; |
|
334 | } |
||
335 | |||
336 | 8 | $oldCount = $this->childCountWithoutTextNode($oldListNode); |
|
337 | 8 | $newCount = $this->childCountWithoutTextNode($newListNode); |
|
338 | |||
339 | 8 | while ($indexInOld < $oldCount) { |
|
340 | 5 | $li = $this->getChildNodeByIndex($oldListNode, $indexInOld); |
|
341 | 5 | $matchingLi = null; |
|
342 | 5 | if ($indexInNew < $newCount) { |
|
343 | 5 | $matchingLi = $this->getChildNodeByIndex($newListNode, $indexInNew); |
|
344 | } |
||
345 | |||
346 | 5 | if (null !== $matchingLi) { |
|
347 | 5 | $htmlDiff = HtmlDiff::create( |
|
348 | 5 | $this->getInnerHtml($li), |
|
349 | 5 | $this->getInnerHtml($matchingLi), |
|
350 | 5 | $this->config |
|
351 | ); |
||
352 | |||
353 | 5 | $this->setInnerHtml($li, $htmlDiff->build()); |
|
354 | |||
355 | 5 | $indexInNew++; |
|
356 | } |
||
357 | |||
358 | 5 | $class = self::CLASS_LIST_ITEM_NONE; |
|
359 | |||
360 | 5 | if ($lastOperation === Operation::DELETED) { |
|
361 | $class = self::CLASS_LIST_ITEM_CHANGED; |
||
362 | } |
||
363 | |||
364 | 5 | $this->appendClassToNode($li, $class); |
|
365 | |||
366 | 5 | $output .= $this->getOuterText($li); |
|
367 | 5 | $indexInOld++; |
|
368 | } |
||
369 | |||
370 | 8 | $this->setInnerHtml($newListNode, $output); |
|
371 | 8 | $this->appendClassToNode($newListNode, 'diff-list'); |
|
372 | |||
373 | 8 | return $newListNode->ownerDocument->saveHTML($newListNode); |
|
374 | } |
||
375 | |||
376 | 8 | protected function appendClassToNode(DOMElement $node, string $class) |
|
377 | { |
||
378 | 8 | $node->setAttribute( |
|
379 | 8 | 'class', |
|
380 | 8 | trim(sprintf('%s %s', $node->getAttribute('class'), $class)) |
|
381 | ); |
||
382 | 8 | } |
|
383 | |||
384 | 8 | private function getOuterText(DOMNode $node) : string |
|
385 | { |
||
386 | 8 | return $node->ownerDocument->saveHTML($node); |
|
387 | } |
||
388 | |||
389 | 8 | private function getInnerHtml(DOMNode $node) : string |
|
390 | { |
||
391 | 8 | $bufferDom = new DOMDocument('1.0', 'UTF-8'); |
|
392 | |||
393 | 8 | foreach($node->childNodes as $childNode) |
|
0 ignored issues
–
show
Coding Style
introduced
by
Loading history...
|
|||
394 | { |
||
395 | 8 | $bufferDom->appendChild($bufferDom->importNode($childNode, true)); |
|
396 | } |
||
397 | |||
398 | 8 | return trim($bufferDom->saveHTML()); |
|
399 | } |
||
400 | |||
401 | 8 | private function setInnerHtml(DOMNode $node, string $html) : void |
|
402 | { |
||
403 | 8 | $html = sprintf('<%s>%s</%s>', 'body', $html, 'body'); |
|
404 | 8 | $html = mb_convert_encoding($html, 'HTML-ENTITIES', 'UTF-8'); |
|
405 | |||
406 | 8 | $node->nodeValue = ''; |
|
407 | |||
408 | 8 | $bufferDom = new DOMDocument('1.0', 'UTF-8'); |
|
409 | 8 | $bufferDom->loadHTML($html); |
|
410 | |||
411 | 8 | $bodyNode = $bufferDom->getElementsByTagName('body')->item(0); |
|
412 | |||
413 | 8 | foreach ($bodyNode->childNodes as $childNode) { |
|
414 | 8 | $node->appendChild($node->ownerDocument->importNode($childNode, true)); |
|
415 | } |
||
416 | |||
417 | 8 | $this->nodeCache = []; |
|
418 | 8 | } |
|
419 | |||
420 | 6 | private function wrapNodeContent(DOMElement $node, string $tagName) : void |
|
421 | { |
||
422 | 6 | $childNodes = []; |
|
423 | |||
424 | 6 | foreach ($node->childNodes as $childNode) { |
|
425 | 6 | $childNodes[] = $childNode; |
|
426 | } |
||
427 | |||
428 | 6 | $wrapNode = $node->ownerDocument->createElement($tagName); |
|
429 | |||
430 | 6 | $node->appendChild($wrapNode); |
|
431 | |||
432 | 6 | foreach ($childNodes as $childNode) { |
|
433 | 6 | $wrapNode->appendChild($childNode); |
|
434 | } |
||
435 | 6 | } |
|
436 | |||
437 | 8 | private function childCountWithoutTextNode(DOMNode $node) : int |
|
438 | { |
||
439 | 8 | $counter = 0; |
|
440 | |||
441 | 8 | foreach ($node->childNodes as $childNode) { |
|
442 | 8 | if ($childNode instanceof DOMText) { |
|
443 | 8 | continue; |
|
444 | } |
||
445 | |||
446 | 8 | $counter++; |
|
447 | } |
||
448 | |||
449 | 8 | return $counter; |
|
450 | } |
||
451 | |||
452 | 8 | private function getChildNodeByIndex(DOMNode $node, int $index) : DOMElement |
|
453 | { |
||
454 | 8 | $nodeHash = spl_object_hash($node); |
|
455 | |||
456 | 8 | if (isset($this->nodeCache[$nodeHash]) === true) { |
|
457 | 4 | return $this->nodeCache[$nodeHash][$index]; |
|
458 | } |
||
459 | |||
460 | 8 | $listCache[$nodeHash] = []; |
|
461 | |||
462 | 8 | foreach ($node->childNodes as $childNode) { |
|
463 | 8 | if ($childNode instanceof DOMText === false) { |
|
464 | 8 | $this->nodeCache[$nodeHash][] = $childNode; |
|
465 | } |
||
466 | } |
||
467 | |||
468 | 8 | return $this->nodeCache[$nodeHash][$index]; |
|
469 | } |
||
470 | } |
||
471 |