1 | <?php |
||
2 | /** |
||
3 | * (c) Steve Nebes <[email protected]> |
||
4 | * |
||
5 | * For the full copyright and license information, please view the LICENSE |
||
6 | * file that was distributed with this source code. |
||
7 | */ |
||
8 | |||
9 | declare(strict_types=1); |
||
10 | |||
11 | namespace SN\DaisyDiff\Html; |
||
12 | |||
13 | use ArrayIterator; |
||
14 | use SN\DaisyDiff\Html\Ancestor\AncestorComparator; |
||
15 | use SN\DaisyDiff\Html\Ancestor\AncestorComparatorResult; |
||
16 | use SN\DaisyDiff\Html\Dom\BodyNode; |
||
17 | use SN\DaisyDiff\Html\Dom\DomTreeBuilder; |
||
18 | use SN\DaisyDiff\Html\Dom\Helper\LastCommonParentResult; |
||
19 | use SN\DaisyDiff\Html\Dom\TagNode; |
||
20 | use SN\DaisyDiff\Html\Dom\TextNode; |
||
21 | use SN\DaisyDiff\Html\Modification\Modification; |
||
22 | use SN\DaisyDiff\Html\Modification\ModificationType; |
||
23 | use SN\DaisyDiff\RangeDifferencer\RangeComparatorInterface; |
||
24 | use IteratorAggregate; |
||
25 | |||
26 | /** |
||
27 | * A comparator that generates a DOM tree of sorts from handling SAX events. Then it can be used to compute the |
||
28 | * differences between DOM trees and mark elements accordingly. |
||
29 | */ |
||
30 | class TextNodeComparator implements RangeComparatorInterface, IteratorAggregate |
||
31 | { |
||
32 | /** @var TextNode[] */ |
||
33 | public $textNodes = []; |
||
34 | |||
35 | /** @var Modification[] */ |
||
36 | private $lastModified = []; |
||
37 | |||
38 | /** @var BodyNode */ |
||
39 | private $bodyNode; |
||
40 | |||
41 | /** @var int */ |
||
42 | private $newId = 0; |
||
43 | |||
44 | /** @var int */ |
||
45 | private $changedId = 0; |
||
46 | |||
47 | /** @var int */ |
||
48 | private $deletedId = 0; |
||
49 | |||
50 | /** @var bool */ |
||
51 | private $changedIdUsed = false; |
||
52 | |||
53 | /** @var bool */ |
||
54 | private $whiteAfterLastChangedPart = false; |
||
55 | |||
56 | /** |
||
57 | * Default values. |
||
58 | * |
||
59 | * @param DomTreeBuilder $domTreeBuilder |
||
60 | */ |
||
61 | 66 | public function __construct(DomTreeBuilder $domTreeBuilder) |
|
62 | { |
||
63 | 66 | $this->textNodes = $domTreeBuilder->getTextNodes(); |
|
64 | 66 | $this->bodyNode = $domTreeBuilder->getBodyNode(); |
|
65 | 66 | } |
|
66 | |||
67 | /** |
||
68 | * @return BodyNode |
||
69 | */ |
||
70 | 53 | public function getBodyNode(): BodyNode |
|
71 | { |
||
72 | 53 | return $this->bodyNode; |
|
73 | } |
||
74 | |||
75 | /** |
||
76 | * @return int |
||
77 | */ |
||
78 | 52 | public function getRangeCount(): int |
|
79 | { |
||
80 | 52 | return \count($this->textNodes); |
|
81 | } |
||
82 | |||
83 | /** |
||
84 | * @param int $index |
||
85 | * @return TextNode |
||
86 | * |
||
87 | * @throws \OutOfBoundsException |
||
88 | */ |
||
89 | 58 | public function getTextNode(int $index): TextNode |
|
90 | { |
||
91 | 58 | if (isset($this->textNodes[$index])) { |
|
92 | 57 | return $this->textNodes[$index]; |
|
93 | } |
||
94 | |||
95 | 1 | throw new \OutOfBoundsException(); |
|
96 | } |
||
97 | |||
98 | /** |
||
99 | * Marks the given range as new. In the output, the range will be formatted as specified by the anOutputFormat |
||
100 | * parameter. |
||
101 | * |
||
102 | * @param int $start |
||
103 | * @param int $end |
||
104 | * @param string $outputFormat |
||
105 | */ |
||
106 | 35 | public function markAsNew(int $start, int $end, string $outputFormat = ModificationType::ADDED): void |
|
107 | { |
||
108 | 35 | if ($end <= $start) { |
|
109 | 12 | return; |
|
110 | } |
||
111 | |||
112 | 24 | if ($this->whiteAfterLastChangedPart) { |
|
113 | 8 | $this->getTextNode($start)->setWhiteBefore(false); |
|
114 | } |
||
115 | |||
116 | /** @var Modification[] */ |
||
117 | 24 | $nextLastModified = []; |
|
118 | |||
119 | 24 | for ($i = $start; $i < $end; $i++) { |
|
120 | 24 | $mod = new Modification(ModificationType::ADDED, $outputFormat); |
|
121 | 24 | $mod->setId($this->newId); |
|
122 | |||
123 | 24 | if (\count($this->lastModified) > 0) { |
|
124 | 14 | $mod->setPrevious($this->lastModified[0]); |
|
125 | |||
126 | 14 | if (null === $this->lastModified[0]->getNext()) { |
|
127 | 14 | foreach ($this->lastModified as $lastMod) { |
|
128 | 14 | $lastMod->setNext($mod); |
|
129 | } |
||
130 | } |
||
131 | } |
||
132 | |||
133 | 24 | $nextLastModified[] = $mod; |
|
134 | 24 | $this->getTextNode($i)->setModification($mod); |
|
135 | } |
||
136 | |||
137 | 24 | $this->getTextNode($start)->getModification()->setFirstOfId(true); |
|
138 | 24 | $this->newId++; |
|
139 | 24 | $this->lastModified = $nextLastModified; |
|
140 | 24 | } |
|
141 | |||
142 | /** |
||
143 | * {@inheritdoc} |
||
144 | */ |
||
145 | 45 | public function rangesEqual(int $thisIndex, RangeComparatorInterface $other, int $otherIndex): bool |
|
146 | { |
||
147 | 45 | if ($other instanceof TextNodeComparator) { |
|
148 | 45 | return $this->getTextNode($thisIndex)->isSameText($other->getTextNode($otherIndex)); |
|
149 | } |
||
150 | |||
151 | return false; // @codeCoverageIgnore |
||
152 | } |
||
153 | |||
154 | /** |
||
155 | * {@inheritdoc} |
||
156 | */ |
||
157 | 32 | public function skipRangeComparison(int $length, int $maxLength, RangeComparatorInterface $other): bool |
|
158 | { |
||
159 | 32 | return false; |
|
160 | } |
||
161 | |||
162 | /** |
||
163 | * @param int $leftStart |
||
164 | * @param int $leftEnd |
||
165 | * @param int $rightStart |
||
166 | * @param int $rightEnd |
||
167 | * @param TextNodeComparator $leftComparator |
||
168 | */ |
||
169 | 43 | public function handlePossibleChangedPart( |
|
170 | int $leftStart, |
||
171 | int $leftEnd, |
||
172 | int $rightStart, |
||
173 | int $rightEnd, |
||
174 | TextNodeComparator $leftComparator |
||
175 | ): void { |
||
176 | // $leftEnd is not used below. |
||
177 | 43 | \assert(\is_int($leftEnd)); |
|
178 | |||
179 | 43 | $i = $rightStart; |
|
180 | 43 | $j = $leftStart; |
|
181 | |||
182 | 43 | if ($this->changedIdUsed) { |
|
183 | 5 | $this->changedId++; |
|
184 | 5 | $this->changedIdUsed = false; |
|
185 | } |
||
186 | |||
187 | /** @var Modification[] */ |
||
188 | 43 | $nextLastModified = []; |
|
189 | 43 | $changes = ''; |
|
190 | |||
191 | 43 | while ($i < $rightEnd) { |
|
192 | 43 | $acThis = new AncestorComparator($this->getTextNode($i)->getParentTree()); |
|
193 | 43 | $acOther = new AncestorComparator($leftComparator->getTextNode($j)->getParentTree()); |
|
194 | |||
195 | /** @var AncestorComparatorResult */ |
||
196 | 43 | $result = $acThis->getResult($acOther); |
|
197 | |||
198 | 43 | if ($result->isChanged()) { |
|
199 | 20 | $mod = new Modification(ModificationType::CHANGED, ModificationType::CHANGED); |
|
200 | |||
201 | 20 | if (!$this->changedIdUsed) { |
|
202 | 20 | $mod->setFirstOfId(true); |
|
203 | |||
204 | 20 | if (\count($nextLastModified) > 0) { |
|
205 | 3 | $this->lastModified = $nextLastModified; |
|
206 | 20 | $nextLastModified = []; |
|
207 | } |
||
208 | 10 | } elseif (!empty($result->getChanges()) && $changes !== $result->getChanges()) { |
|
209 | 5 | $this->changedId++; |
|
210 | 5 | $mod->setFirstOfId(true); |
|
211 | |||
212 | 5 | if (\count($nextLastModified) > 0) { |
|
213 | 5 | $this->lastModified = $nextLastModified; |
|
214 | 5 | $nextLastModified = []; |
|
215 | } |
||
216 | } |
||
217 | |||
218 | 20 | if (\count($this->lastModified) > 0) { |
|
219 | 11 | $mod->setPrevious($this->lastModified[0]); |
|
220 | |||
221 | 11 | if (null === $this->lastModified[0]->getNext()) { |
|
222 | 11 | foreach ($this->lastModified as $lastMod) { |
|
223 | 11 | $lastMod->setNext($mod); |
|
224 | } |
||
225 | } |
||
226 | } |
||
227 | |||
228 | 20 | $nextLastModified[] = $mod; |
|
229 | |||
230 | 20 | $mod->setChanges($result->getChanges()); |
|
231 | 20 | $mod->setHtmlLayoutChanges($result->getHtmlLayoutChanges()); |
|
232 | 20 | $mod->setId($this->changedId); |
|
233 | |||
234 | 20 | $this->getTextNode($i)->setModification($mod); |
|
235 | 20 | $changes = $result->getChanges(); |
|
236 | 20 | $this->changedIdUsed = true; |
|
237 | 37 | } elseif ($this->changedIdUsed) { |
|
238 | 13 | $this->changedId++; |
|
239 | 13 | $this->changedIdUsed = false; |
|
240 | } |
||
241 | |||
242 | 43 | $i++; |
|
243 | 43 | $j++; |
|
244 | } |
||
245 | |||
246 | 43 | if (\count($nextLastModified) > 0) { |
|
247 | 20 | $this->lastModified = $nextLastModified; |
|
248 | } |
||
249 | 43 | } |
|
250 | |||
251 | /** |
||
252 | * Marks the given range as deleted. In the output, the range will be formatted as specified by the parameter |
||
253 | * anOutputFormat. |
||
254 | * |
||
255 | * @param int $start |
||
256 | * @param int $end |
||
257 | * @param TextNodeComparator $oldComp |
||
258 | * @param int $before |
||
259 | * @param string $outputFormat |
||
260 | */ |
||
261 | 24 | public function markAsDeleted( |
|
262 | int $start, |
||
263 | int $end, |
||
264 | TextNodeComparator $oldComp, |
||
265 | int $before, |
||
266 | string $outputFormat = ModificationType::REMOVED |
||
267 | ): void |
||
268 | { |
||
269 | 24 | if ($end <= $start) { |
|
270 | 1 | return; |
|
271 | } |
||
272 | |||
273 | 23 | if ($before > 0 && $this->getTextNode($before - 1)->isWhiteAfter()) { |
|
274 | 14 | $this->whiteAfterLastChangedPart = true; |
|
275 | } else { |
||
276 | 10 | $this->whiteAfterLastChangedPart = false; |
|
277 | } |
||
278 | |||
279 | /** @var Modification[] */ |
||
280 | 23 | $nextLastModified = []; |
|
281 | |||
282 | 23 | for ($i = $start; $i < $end; $i++) { |
|
283 | 23 | $mod = new Modification(ModificationType::REMOVED, $outputFormat); |
|
284 | 23 | $mod->setId($this->deletedId); |
|
285 | |||
286 | 23 | if (\count($this->lastModified) > 0) { |
|
287 | 5 | $mod->setPrevious($this->lastModified[0]); |
|
288 | |||
289 | 5 | if (null === $this->lastModified[0]->getNext()) { |
|
290 | 5 | foreach ($this->lastModified as $lastMod) { |
|
291 | 5 | $lastMod->setNext($mod); |
|
292 | } |
||
293 | } |
||
294 | } |
||
295 | |||
296 | 23 | $nextLastModified[] = $mod; |
|
297 | |||
298 | // $oldComp is used here because we're going to move its deleted elements to this tree. |
||
299 | 23 | $oldComp->getTextNode($i)->setModification($mod); |
|
300 | } |
||
301 | |||
302 | 23 | $oldComp->getTextNode($start)->getModification()->setFirstOfId(true); |
|
303 | |||
304 | /** @var TagNode[] $deletedNodes */ |
||
305 | 23 | $deletedNodes = $oldComp->getBodyNode()->getMinimalDeletedSet($this->deletedId); |
|
306 | |||
307 | // Set $prevLeaf to the leaf after which the old HTML needs to be inserted. |
||
308 | 23 | $prevLeaf = null; |
|
309 | |||
310 | 23 | if ($before > 0) { |
|
311 | 19 | $prevLeaf = $this->getTextNode($before - 1); |
|
312 | } |
||
313 | |||
314 | // Set $nextLeaf to the leaf before which the old HTML needs to be inserted. |
||
315 | 23 | $nextLeaf = null; |
|
316 | |||
317 | 23 | if ($before < $this->getRangeCount()) { |
|
318 | 21 | $nextLeaf = $this->getTextNode($before); |
|
319 | } |
||
320 | |||
321 | 23 | while (\count($deletedNodes) > 0) { |
|
322 | 21 | $prevResult = null; |
|
323 | 21 | $nextResult = null; |
|
324 | |||
325 | 21 | if (null !== $prevLeaf) { |
|
326 | 17 | $prevResult = $prevLeaf->getLastCommonParent($deletedNodes[0]); |
|
327 | } else { |
||
328 | 4 | $prevResult = new LastCommonParentResult(); |
|
329 | 4 | $prevResult->setLastCommonParent($this->getBodyNode()); |
|
330 | 4 | $prevResult->setIndexInLastCommonParent(-1); |
|
331 | } |
||
332 | |||
333 | 21 | if (null !== $nextLeaf) { |
|
334 | 19 | $nextResult = $nextLeaf->getLastCommonParent($deletedNodes[\count($deletedNodes) - 1]); |
|
335 | } else { |
||
336 | 2 | $nextResult = new LastCommonParentResult(); |
|
337 | 2 | $nextResult->setLastCommonParent($this->getBodyNode()); |
|
338 | 2 | $nextResult->setIndexInLastCommonParent($this->getBodyNode()->getNumChildren()); |
|
339 | } |
||
340 | |||
341 | 21 | if ($prevResult->getLastCommonParentDepth() === $nextResult->getLastCommonParentDepth()) { |
|
342 | // We need some metric to choose which way to add... |
||
343 | if ( |
||
344 | 18 | $deletedNodes[0]->getParent() === $deletedNodes[\count($deletedNodes) - 1]->getParent() && |
|
345 | 18 | $prevResult->getLastCommonParent() === $nextResult->getLastCommonParent() |
|
346 | ) { |
||
347 | // The difference is not in the parent. |
||
348 | 18 | $prevResult->setLastCommonParentDepth($prevResult->getLastCommonParentDepth() + 1); |
|
349 | } else { |
||
350 | // The difference is in the parent, so compare them. now THIS is tricky. |
||
351 | 1 | $distancePrev = $deletedNodes[0] |
|
352 | 1 | ->getParent() |
|
353 | 1 | ->getMatchRatio($prevResult->getLastCommonParent()); |
|
354 | 1 | $distanceNext = $deletedNodes[\count($deletedNodes) - 1] |
|
355 | 1 | ->getParent() |
|
356 | 1 | ->getMatchRatio($nextResult->getLastCommonParent()); |
|
357 | |||
358 | 1 | if ($distancePrev <= $distanceNext) { |
|
359 | // Insert after the previous node. |
||
360 | $prevResult->setLastCommonParentDepth($prevResult->getLastCommonParentDepth() + 1); |
||
361 | } else { |
||
362 | // Insert before the next node. |
||
363 | 1 | $nextResult->setLastCommonParentDepth($nextResult->getLastCommonParentDepth() + 1); |
|
364 | } |
||
365 | } |
||
366 | } |
||
367 | |||
368 | 21 | if ($prevResult->getLastCommonParentDepth() > $nextResult->getLastCommonParentDepth()) { |
|
369 | // Inserting at the front. |
||
370 | 18 | if ($prevResult->isSplittingNeeded()) { |
|
371 | 1 | $prevLeaf->getParent()->splitUntil($prevResult->getLastCommonParent(), $prevLeaf, true); |
|
0 ignored issues
–
show
Bug
introduced
by
![]() |
|||
372 | } |
||
373 | |||
374 | // array_shift removes first array element, and returns it. |
||
375 | 18 | $node = \array_shift($deletedNodes); |
|
376 | 18 | $prevLeaf = $node->copyTree(); |
|
377 | 18 | $prevLeaf->setParent($prevResult->getLastCommonParent()); |
|
378 | 18 | $prevResult->getLastCommonParent()->addChild($prevLeaf, $prevResult->getIndexInLastCommonParent() + 1); |
|
379 | 4 | } elseif ($prevResult->getLastCommonParentDepth() < $nextResult->getLastCommonParentDepth()) { |
|
380 | // Inserting at the back. |
||
381 | 4 | if ($nextResult->isSplittingNeeded()) { |
|
382 | $splitOccurred = $nextLeaf |
||
383 | 2 | ->getParent() |
|
384 | 2 | ->splitUntil($nextResult->getLastCommonParent(), $nextLeaf, false); |
|
385 | |||
386 | 2 | if ($splitOccurred) { |
|
387 | // The place where to insert is shifted one place to the right. |
||
388 | $nextResult->setIndexInLastCommonParent($nextResult->getIndexInLastCommonParent() + 1); |
||
389 | } |
||
390 | } |
||
391 | |||
392 | // array_pop removes last array element, and returns it. |
||
393 | 4 | $node = \array_pop($deletedNodes); |
|
394 | 4 | $nextLeaf = $node->copyTree(); |
|
395 | 4 | $nextLeaf->setParent($nextResult->getLastCommonParent()); |
|
396 | 4 | $nextResult->getLastCommonParent()->addChild($nextLeaf, $nextResult->getIndexInLastCommonParent()); |
|
397 | } else { |
||
398 | throw new \RuntimeException(); |
||
399 | } |
||
400 | } |
||
401 | |||
402 | 23 | $this->lastModified = $nextLastModified; |
|
403 | 23 | $this->deletedId++; |
|
404 | 23 | } |
|
405 | |||
406 | /** |
||
407 | * @return void |
||
408 | */ |
||
409 | 49 | public function expandWhiteSpace(): void |
|
410 | { |
||
411 | 49 | $this->getBodyNode()->expandWhiteSpace(); |
|
412 | 49 | } |
|
413 | |||
414 | /** |
||
415 | * @return ArrayIterator |
||
416 | */ |
||
417 | 1 | public function getIterator(): ArrayIterator |
|
418 | { |
||
419 | 1 | return new ArrayIterator($this->textNodes); |
|
420 | } |
||
421 | |||
422 | /** |
||
423 | * @codeCoverageIgnore |
||
424 | * @deprecated Not used, and will not be used in the future. |
||
425 | * |
||
426 | * Used for combining multiple comparators in order to create a single output document. The IDs must be successive |
||
427 | * along the different comparators. |
||
428 | * |
||
429 | * @param int $value |
||
430 | */ |
||
431 | public function setStartDeletedId(int $value): void |
||
432 | { |
||
433 | $this->deletedId = $value; |
||
434 | } |
||
435 | |||
436 | /** |
||
437 | * @codeCoverageIgnore |
||
438 | * @deprecated Not used, and will not be used in the future. |
||
439 | * |
||
440 | * Used for combining multiple comparators in order to create a single output document. The IDs must be successive |
||
441 | * along the different comparators. |
||
442 | * |
||
443 | * @param int $value |
||
444 | */ |
||
445 | public function setStartChangedId(int $value): void |
||
446 | { |
||
447 | $this->changedId = $value; |
||
448 | } |
||
449 | |||
450 | /** |
||
451 | * @codeCoverageIgnore |
||
452 | * @deprecated Not used, and will not be used in the future. |
||
453 | * |
||
454 | * Used for combining multiple comparators in order to create a single output document. The IDs must be successive |
||
455 | * along the different comparators. |
||
456 | * |
||
457 | * @param int $value |
||
458 | */ |
||
459 | public function setStartNewId(int $value): void |
||
460 | { |
||
461 | $this->newId = $value; |
||
462 | } |
||
463 | |||
464 | /** |
||
465 | * @return int |
||
466 | */ |
||
467 | 1 | public function getChangedId(): int |
|
468 | { |
||
469 | 1 | return $this->changedId; |
|
470 | } |
||
471 | |||
472 | /** |
||
473 | * @return int |
||
474 | */ |
||
475 | 1 | public function getDeletedId(): int |
|
476 | { |
||
477 | 1 | return $this->deletedId; |
|
478 | } |
||
479 | |||
480 | /** |
||
481 | * @return int |
||
482 | */ |
||
483 | 1 | public function getNewId(): int |
|
484 | { |
||
485 | 1 | return $this->newId; |
|
486 | } |
||
487 | |||
488 | /** |
||
489 | * @return Modification[] |
||
490 | */ |
||
491 | 10 | public function getLastModified(): array |
|
492 | { |
||
493 | 10 | return $this->lastModified; |
|
494 | } |
||
495 | |||
496 | /** |
||
497 | * @param Modification[] $lastModified |
||
498 | */ |
||
499 | 3 | public function setLastModified(array $lastModified): void |
|
500 | { |
||
501 | 3 | $this->lastModified = $lastModified; |
|
502 | 3 | } |
|
503 | } |
||
504 |