snebes /
php-daisydiff
| 1 | <?php |
||
| 2 | /** |
||
| 3 | * (c) Steve Nebes <[email protected]> |
||
| 4 | * |
||
| 5 | * For the full copyright and license information, please view the LICENSE |
||
| 6 | * file that was distributed with this source code. |
||
| 7 | */ |
||
| 8 | |||
| 9 | declare(strict_types=1); |
||
| 10 | |||
| 11 | namespace SN\DaisyDiff\Html; |
||
| 12 | |||
| 13 | use ArrayIterator; |
||
| 14 | use SN\DaisyDiff\Html\Ancestor\AncestorComparator; |
||
| 15 | use SN\DaisyDiff\Html\Ancestor\AncestorComparatorResult; |
||
| 16 | use SN\DaisyDiff\Html\Dom\BodyNode; |
||
| 17 | use SN\DaisyDiff\Html\Dom\DomTreeBuilder; |
||
| 18 | use SN\DaisyDiff\Html\Dom\Helper\LastCommonParentResult; |
||
| 19 | use SN\DaisyDiff\Html\Dom\TagNode; |
||
| 20 | use SN\DaisyDiff\Html\Dom\TextNode; |
||
| 21 | use SN\DaisyDiff\Html\Modification\Modification; |
||
| 22 | use SN\DaisyDiff\Html\Modification\ModificationType; |
||
| 23 | use SN\DaisyDiff\RangeDifferencer\RangeComparatorInterface; |
||
| 24 | use IteratorAggregate; |
||
| 25 | |||
| 26 | /** |
||
| 27 | * A comparator that generates a DOM tree of sorts from handling SAX events. Then it can be used to compute the |
||
| 28 | * differences between DOM trees and mark elements accordingly. |
||
| 29 | */ |
||
| 30 | class TextNodeComparator implements RangeComparatorInterface, IteratorAggregate |
||
| 31 | { |
||
| 32 | /** @var TextNode[] */ |
||
| 33 | public $textNodes = []; |
||
| 34 | |||
| 35 | /** @var Modification[] */ |
||
| 36 | private $lastModified = []; |
||
| 37 | |||
| 38 | /** @var BodyNode */ |
||
| 39 | private $bodyNode; |
||
| 40 | |||
| 41 | /** @var int */ |
||
| 42 | private $newId = 0; |
||
| 43 | |||
| 44 | /** @var int */ |
||
| 45 | private $changedId = 0; |
||
| 46 | |||
| 47 | /** @var int */ |
||
| 48 | private $deletedId = 0; |
||
| 49 | |||
| 50 | /** @var bool */ |
||
| 51 | private $changedIdUsed = false; |
||
| 52 | |||
| 53 | /** @var bool */ |
||
| 54 | private $whiteAfterLastChangedPart = false; |
||
| 55 | |||
| 56 | /** |
||
| 57 | * Default values. |
||
| 58 | * |
||
| 59 | * @param DomTreeBuilder $domTreeBuilder |
||
| 60 | */ |
||
| 61 | 66 | public function __construct(DomTreeBuilder $domTreeBuilder) |
|
| 62 | { |
||
| 63 | 66 | $this->textNodes = $domTreeBuilder->getTextNodes(); |
|
| 64 | 66 | $this->bodyNode = $domTreeBuilder->getBodyNode(); |
|
| 65 | 66 | } |
|
| 66 | |||
| 67 | /** |
||
| 68 | * @return BodyNode |
||
| 69 | */ |
||
| 70 | 53 | public function getBodyNode(): BodyNode |
|
| 71 | { |
||
| 72 | 53 | return $this->bodyNode; |
|
| 73 | } |
||
| 74 | |||
| 75 | /** |
||
| 76 | * @return int |
||
| 77 | */ |
||
| 78 | 52 | public function getRangeCount(): int |
|
| 79 | { |
||
| 80 | 52 | return \count($this->textNodes); |
|
| 81 | } |
||
| 82 | |||
| 83 | /** |
||
| 84 | * @param int $index |
||
| 85 | * @return TextNode |
||
| 86 | * |
||
| 87 | * @throws \OutOfBoundsException |
||
| 88 | */ |
||
| 89 | 58 | public function getTextNode(int $index): TextNode |
|
| 90 | { |
||
| 91 | 58 | if (isset($this->textNodes[$index])) { |
|
| 92 | 57 | return $this->textNodes[$index]; |
|
| 93 | } |
||
| 94 | |||
| 95 | 1 | throw new \OutOfBoundsException(); |
|
| 96 | } |
||
| 97 | |||
| 98 | /** |
||
| 99 | * Marks the given range as new. In the output, the range will be formatted as specified by the anOutputFormat |
||
| 100 | * parameter. |
||
| 101 | * |
||
| 102 | * @param int $start |
||
| 103 | * @param int $end |
||
| 104 | * @param string $outputFormat |
||
| 105 | */ |
||
| 106 | 35 | public function markAsNew(int $start, int $end, string $outputFormat = ModificationType::ADDED): void |
|
| 107 | { |
||
| 108 | 35 | if ($end <= $start) { |
|
| 109 | 12 | return; |
|
| 110 | } |
||
| 111 | |||
| 112 | 24 | if ($this->whiteAfterLastChangedPart) { |
|
| 113 | 8 | $this->getTextNode($start)->setWhiteBefore(false); |
|
| 114 | } |
||
| 115 | |||
| 116 | /** @var Modification[] */ |
||
| 117 | 24 | $nextLastModified = []; |
|
| 118 | |||
| 119 | 24 | for ($i = $start; $i < $end; $i++) { |
|
| 120 | 24 | $mod = new Modification(ModificationType::ADDED, $outputFormat); |
|
| 121 | 24 | $mod->setId($this->newId); |
|
| 122 | |||
| 123 | 24 | if (\count($this->lastModified) > 0) { |
|
| 124 | 14 | $mod->setPrevious($this->lastModified[0]); |
|
| 125 | |||
| 126 | 14 | if (null === $this->lastModified[0]->getNext()) { |
|
| 127 | 14 | foreach ($this->lastModified as $lastMod) { |
|
| 128 | 14 | $lastMod->setNext($mod); |
|
| 129 | } |
||
| 130 | } |
||
| 131 | } |
||
| 132 | |||
| 133 | 24 | $nextLastModified[] = $mod; |
|
| 134 | 24 | $this->getTextNode($i)->setModification($mod); |
|
| 135 | } |
||
| 136 | |||
| 137 | 24 | $this->getTextNode($start)->getModification()->setFirstOfId(true); |
|
| 138 | 24 | $this->newId++; |
|
| 139 | 24 | $this->lastModified = $nextLastModified; |
|
| 140 | 24 | } |
|
| 141 | |||
| 142 | /** |
||
| 143 | * {@inheritdoc} |
||
| 144 | */ |
||
| 145 | 45 | public function rangesEqual(int $thisIndex, RangeComparatorInterface $other, int $otherIndex): bool |
|
| 146 | { |
||
| 147 | 45 | if ($other instanceof TextNodeComparator) { |
|
| 148 | 45 | return $this->getTextNode($thisIndex)->isSameText($other->getTextNode($otherIndex)); |
|
| 149 | } |
||
| 150 | |||
| 151 | return false; // @codeCoverageIgnore |
||
| 152 | } |
||
| 153 | |||
| 154 | /** |
||
| 155 | * {@inheritdoc} |
||
| 156 | */ |
||
| 157 | 32 | public function skipRangeComparison(int $length, int $maxLength, RangeComparatorInterface $other): bool |
|
| 158 | { |
||
| 159 | 32 | return false; |
|
| 160 | } |
||
| 161 | |||
| 162 | /** |
||
| 163 | * @param int $leftStart |
||
| 164 | * @param int $leftEnd |
||
| 165 | * @param int $rightStart |
||
| 166 | * @param int $rightEnd |
||
| 167 | * @param TextNodeComparator $leftComparator |
||
| 168 | */ |
||
| 169 | 43 | public function handlePossibleChangedPart( |
|
| 170 | int $leftStart, |
||
| 171 | int $leftEnd, |
||
| 172 | int $rightStart, |
||
| 173 | int $rightEnd, |
||
| 174 | TextNodeComparator $leftComparator |
||
| 175 | ): void { |
||
| 176 | // $leftEnd is not used below. |
||
| 177 | 43 | \assert(\is_int($leftEnd)); |
|
| 178 | |||
| 179 | 43 | $i = $rightStart; |
|
| 180 | 43 | $j = $leftStart; |
|
| 181 | |||
| 182 | 43 | if ($this->changedIdUsed) { |
|
| 183 | 5 | $this->changedId++; |
|
| 184 | 5 | $this->changedIdUsed = false; |
|
| 185 | } |
||
| 186 | |||
| 187 | /** @var Modification[] */ |
||
| 188 | 43 | $nextLastModified = []; |
|
| 189 | 43 | $changes = ''; |
|
| 190 | |||
| 191 | 43 | while ($i < $rightEnd) { |
|
| 192 | 43 | $acThis = new AncestorComparator($this->getTextNode($i)->getParentTree()); |
|
| 193 | 43 | $acOther = new AncestorComparator($leftComparator->getTextNode($j)->getParentTree()); |
|
| 194 | |||
| 195 | /** @var AncestorComparatorResult */ |
||
| 196 | 43 | $result = $acThis->getResult($acOther); |
|
| 197 | |||
| 198 | 43 | if ($result->isChanged()) { |
|
| 199 | 20 | $mod = new Modification(ModificationType::CHANGED, ModificationType::CHANGED); |
|
| 200 | |||
| 201 | 20 | if (!$this->changedIdUsed) { |
|
| 202 | 20 | $mod->setFirstOfId(true); |
|
| 203 | |||
| 204 | 20 | if (\count($nextLastModified) > 0) { |
|
| 205 | 3 | $this->lastModified = $nextLastModified; |
|
| 206 | 20 | $nextLastModified = []; |
|
| 207 | } |
||
| 208 | 10 | } elseif (!empty($result->getChanges()) && $changes !== $result->getChanges()) { |
|
| 209 | 5 | $this->changedId++; |
|
| 210 | 5 | $mod->setFirstOfId(true); |
|
| 211 | |||
| 212 | 5 | if (\count($nextLastModified) > 0) { |
|
| 213 | 5 | $this->lastModified = $nextLastModified; |
|
| 214 | 5 | $nextLastModified = []; |
|
| 215 | } |
||
| 216 | } |
||
| 217 | |||
| 218 | 20 | if (\count($this->lastModified) > 0) { |
|
| 219 | 11 | $mod->setPrevious($this->lastModified[0]); |
|
| 220 | |||
| 221 | 11 | if (null === $this->lastModified[0]->getNext()) { |
|
| 222 | 11 | foreach ($this->lastModified as $lastMod) { |
|
| 223 | 11 | $lastMod->setNext($mod); |
|
| 224 | } |
||
| 225 | } |
||
| 226 | } |
||
| 227 | |||
| 228 | 20 | $nextLastModified[] = $mod; |
|
| 229 | |||
| 230 | 20 | $mod->setChanges($result->getChanges()); |
|
| 231 | 20 | $mod->setHtmlLayoutChanges($result->getHtmlLayoutChanges()); |
|
| 232 | 20 | $mod->setId($this->changedId); |
|
| 233 | |||
| 234 | 20 | $this->getTextNode($i)->setModification($mod); |
|
| 235 | 20 | $changes = $result->getChanges(); |
|
| 236 | 20 | $this->changedIdUsed = true; |
|
| 237 | 37 | } elseif ($this->changedIdUsed) { |
|
| 238 | 13 | $this->changedId++; |
|
| 239 | 13 | $this->changedIdUsed = false; |
|
| 240 | } |
||
| 241 | |||
| 242 | 43 | $i++; |
|
| 243 | 43 | $j++; |
|
| 244 | } |
||
| 245 | |||
| 246 | 43 | if (\count($nextLastModified) > 0) { |
|
| 247 | 20 | $this->lastModified = $nextLastModified; |
|
| 248 | } |
||
| 249 | 43 | } |
|
| 250 | |||
| 251 | /** |
||
| 252 | * Marks the given range as deleted. In the output, the range will be formatted as specified by the parameter |
||
| 253 | * anOutputFormat. |
||
| 254 | * |
||
| 255 | * @param int $start |
||
| 256 | * @param int $end |
||
| 257 | * @param TextNodeComparator $oldComp |
||
| 258 | * @param int $before |
||
| 259 | * @param string $outputFormat |
||
| 260 | */ |
||
| 261 | 24 | public function markAsDeleted( |
|
| 262 | int $start, |
||
| 263 | int $end, |
||
| 264 | TextNodeComparator $oldComp, |
||
| 265 | int $before, |
||
| 266 | string $outputFormat = ModificationType::REMOVED |
||
| 267 | ): void |
||
| 268 | { |
||
| 269 | 24 | if ($end <= $start) { |
|
| 270 | 1 | return; |
|
| 271 | } |
||
| 272 | |||
| 273 | 23 | if ($before > 0 && $this->getTextNode($before - 1)->isWhiteAfter()) { |
|
| 274 | 14 | $this->whiteAfterLastChangedPart = true; |
|
| 275 | } else { |
||
| 276 | 10 | $this->whiteAfterLastChangedPart = false; |
|
| 277 | } |
||
| 278 | |||
| 279 | /** @var Modification[] */ |
||
| 280 | 23 | $nextLastModified = []; |
|
| 281 | |||
| 282 | 23 | for ($i = $start; $i < $end; $i++) { |
|
| 283 | 23 | $mod = new Modification(ModificationType::REMOVED, $outputFormat); |
|
| 284 | 23 | $mod->setId($this->deletedId); |
|
| 285 | |||
| 286 | 23 | if (\count($this->lastModified) > 0) { |
|
| 287 | 5 | $mod->setPrevious($this->lastModified[0]); |
|
| 288 | |||
| 289 | 5 | if (null === $this->lastModified[0]->getNext()) { |
|
| 290 | 5 | foreach ($this->lastModified as $lastMod) { |
|
| 291 | 5 | $lastMod->setNext($mod); |
|
| 292 | } |
||
| 293 | } |
||
| 294 | } |
||
| 295 | |||
| 296 | 23 | $nextLastModified[] = $mod; |
|
| 297 | |||
| 298 | // $oldComp is used here because we're going to move its deleted elements to this tree. |
||
| 299 | 23 | $oldComp->getTextNode($i)->setModification($mod); |
|
| 300 | } |
||
| 301 | |||
| 302 | 23 | $oldComp->getTextNode($start)->getModification()->setFirstOfId(true); |
|
| 303 | |||
| 304 | /** @var TagNode[] $deletedNodes */ |
||
| 305 | 23 | $deletedNodes = $oldComp->getBodyNode()->getMinimalDeletedSet($this->deletedId); |
|
| 306 | |||
| 307 | // Set $prevLeaf to the leaf after which the old HTML needs to be inserted. |
||
| 308 | 23 | $prevLeaf = null; |
|
| 309 | |||
| 310 | 23 | if ($before > 0) { |
|
| 311 | 19 | $prevLeaf = $this->getTextNode($before - 1); |
|
| 312 | } |
||
| 313 | |||
| 314 | // Set $nextLeaf to the leaf before which the old HTML needs to be inserted. |
||
| 315 | 23 | $nextLeaf = null; |
|
| 316 | |||
| 317 | 23 | if ($before < $this->getRangeCount()) { |
|
| 318 | 21 | $nextLeaf = $this->getTextNode($before); |
|
| 319 | } |
||
| 320 | |||
| 321 | 23 | while (\count($deletedNodes) > 0) { |
|
| 322 | 21 | $prevResult = null; |
|
| 323 | 21 | $nextResult = null; |
|
| 324 | |||
| 325 | 21 | if (null !== $prevLeaf) { |
|
| 326 | 17 | $prevResult = $prevLeaf->getLastCommonParent($deletedNodes[0]); |
|
| 327 | } else { |
||
| 328 | 4 | $prevResult = new LastCommonParentResult(); |
|
| 329 | 4 | $prevResult->setLastCommonParent($this->getBodyNode()); |
|
| 330 | 4 | $prevResult->setIndexInLastCommonParent(-1); |
|
| 331 | } |
||
| 332 | |||
| 333 | 21 | if (null !== $nextLeaf) { |
|
| 334 | 19 | $nextResult = $nextLeaf->getLastCommonParent($deletedNodes[\count($deletedNodes) - 1]); |
|
| 335 | } else { |
||
| 336 | 2 | $nextResult = new LastCommonParentResult(); |
|
| 337 | 2 | $nextResult->setLastCommonParent($this->getBodyNode()); |
|
| 338 | 2 | $nextResult->setIndexInLastCommonParent($this->getBodyNode()->getNumChildren()); |
|
| 339 | } |
||
| 340 | |||
| 341 | 21 | if ($prevResult->getLastCommonParentDepth() === $nextResult->getLastCommonParentDepth()) { |
|
| 342 | // We need some metric to choose which way to add... |
||
| 343 | if ( |
||
| 344 | 18 | $deletedNodes[0]->getParent() === $deletedNodes[\count($deletedNodes) - 1]->getParent() && |
|
| 345 | 18 | $prevResult->getLastCommonParent() === $nextResult->getLastCommonParent() |
|
| 346 | ) { |
||
| 347 | // The difference is not in the parent. |
||
| 348 | 18 | $prevResult->setLastCommonParentDepth($prevResult->getLastCommonParentDepth() + 1); |
|
| 349 | } else { |
||
| 350 | // The difference is in the parent, so compare them. now THIS is tricky. |
||
| 351 | 1 | $distancePrev = $deletedNodes[0] |
|
| 352 | 1 | ->getParent() |
|
| 353 | 1 | ->getMatchRatio($prevResult->getLastCommonParent()); |
|
| 354 | 1 | $distanceNext = $deletedNodes[\count($deletedNodes) - 1] |
|
| 355 | 1 | ->getParent() |
|
| 356 | 1 | ->getMatchRatio($nextResult->getLastCommonParent()); |
|
| 357 | |||
| 358 | 1 | if ($distancePrev <= $distanceNext) { |
|
| 359 | // Insert after the previous node. |
||
| 360 | $prevResult->setLastCommonParentDepth($prevResult->getLastCommonParentDepth() + 1); |
||
| 361 | } else { |
||
| 362 | // Insert before the next node. |
||
| 363 | 1 | $nextResult->setLastCommonParentDepth($nextResult->getLastCommonParentDepth() + 1); |
|
| 364 | } |
||
| 365 | } |
||
| 366 | } |
||
| 367 | |||
| 368 | 21 | if ($prevResult->getLastCommonParentDepth() > $nextResult->getLastCommonParentDepth()) { |
|
| 369 | // Inserting at the front. |
||
| 370 | 18 | if ($prevResult->isSplittingNeeded()) { |
|
| 371 | 1 | $prevLeaf->getParent()->splitUntil($prevResult->getLastCommonParent(), $prevLeaf, true); |
|
|
0 ignored issues
–
show
Bug
introduced
by
Loading history...
|
|||
| 372 | } |
||
| 373 | |||
| 374 | // array_shift removes first array element, and returns it. |
||
| 375 | 18 | $node = \array_shift($deletedNodes); |
|
| 376 | 18 | $prevLeaf = $node->copyTree(); |
|
| 377 | 18 | $prevLeaf->setParent($prevResult->getLastCommonParent()); |
|
| 378 | 18 | $prevResult->getLastCommonParent()->addChild($prevLeaf, $prevResult->getIndexInLastCommonParent() + 1); |
|
| 379 | 4 | } elseif ($prevResult->getLastCommonParentDepth() < $nextResult->getLastCommonParentDepth()) { |
|
| 380 | // Inserting at the back. |
||
| 381 | 4 | if ($nextResult->isSplittingNeeded()) { |
|
| 382 | $splitOccurred = $nextLeaf |
||
| 383 | 2 | ->getParent() |
|
| 384 | 2 | ->splitUntil($nextResult->getLastCommonParent(), $nextLeaf, false); |
|
| 385 | |||
| 386 | 2 | if ($splitOccurred) { |
|
| 387 | // The place where to insert is shifted one place to the right. |
||
| 388 | $nextResult->setIndexInLastCommonParent($nextResult->getIndexInLastCommonParent() + 1); |
||
| 389 | } |
||
| 390 | } |
||
| 391 | |||
| 392 | // array_pop removes last array element, and returns it. |
||
| 393 | 4 | $node = \array_pop($deletedNodes); |
|
| 394 | 4 | $nextLeaf = $node->copyTree(); |
|
| 395 | 4 | $nextLeaf->setParent($nextResult->getLastCommonParent()); |
|
| 396 | 4 | $nextResult->getLastCommonParent()->addChild($nextLeaf, $nextResult->getIndexInLastCommonParent()); |
|
| 397 | } else { |
||
| 398 | throw new \RuntimeException(); |
||
| 399 | } |
||
| 400 | } |
||
| 401 | |||
| 402 | 23 | $this->lastModified = $nextLastModified; |
|
| 403 | 23 | $this->deletedId++; |
|
| 404 | 23 | } |
|
| 405 | |||
| 406 | /** |
||
| 407 | * @return void |
||
| 408 | */ |
||
| 409 | 49 | public function expandWhiteSpace(): void |
|
| 410 | { |
||
| 411 | 49 | $this->getBodyNode()->expandWhiteSpace(); |
|
| 412 | 49 | } |
|
| 413 | |||
| 414 | /** |
||
| 415 | * @return ArrayIterator |
||
| 416 | */ |
||
| 417 | 1 | public function getIterator(): ArrayIterator |
|
| 418 | { |
||
| 419 | 1 | return new ArrayIterator($this->textNodes); |
|
| 420 | } |
||
| 421 | |||
| 422 | /** |
||
| 423 | * @codeCoverageIgnore |
||
| 424 | * @deprecated Not used, and will not be used in the future. |
||
| 425 | * |
||
| 426 | * Used for combining multiple comparators in order to create a single output document. The IDs must be successive |
||
| 427 | * along the different comparators. |
||
| 428 | * |
||
| 429 | * @param int $value |
||
| 430 | */ |
||
| 431 | public function setStartDeletedId(int $value): void |
||
| 432 | { |
||
| 433 | $this->deletedId = $value; |
||
| 434 | } |
||
| 435 | |||
| 436 | /** |
||
| 437 | * @codeCoverageIgnore |
||
| 438 | * @deprecated Not used, and will not be used in the future. |
||
| 439 | * |
||
| 440 | * Used for combining multiple comparators in order to create a single output document. The IDs must be successive |
||
| 441 | * along the different comparators. |
||
| 442 | * |
||
| 443 | * @param int $value |
||
| 444 | */ |
||
| 445 | public function setStartChangedId(int $value): void |
||
| 446 | { |
||
| 447 | $this->changedId = $value; |
||
| 448 | } |
||
| 449 | |||
| 450 | /** |
||
| 451 | * @codeCoverageIgnore |
||
| 452 | * @deprecated Not used, and will not be used in the future. |
||
| 453 | * |
||
| 454 | * Used for combining multiple comparators in order to create a single output document. The IDs must be successive |
||
| 455 | * along the different comparators. |
||
| 456 | * |
||
| 457 | * @param int $value |
||
| 458 | */ |
||
| 459 | public function setStartNewId(int $value): void |
||
| 460 | { |
||
| 461 | $this->newId = $value; |
||
| 462 | } |
||
| 463 | |||
| 464 | /** |
||
| 465 | * @return int |
||
| 466 | */ |
||
| 467 | 1 | public function getChangedId(): int |
|
| 468 | { |
||
| 469 | 1 | return $this->changedId; |
|
| 470 | } |
||
| 471 | |||
| 472 | /** |
||
| 473 | * @return int |
||
| 474 | */ |
||
| 475 | 1 | public function getDeletedId(): int |
|
| 476 | { |
||
| 477 | 1 | return $this->deletedId; |
|
| 478 | } |
||
| 479 | |||
| 480 | /** |
||
| 481 | * @return int |
||
| 482 | */ |
||
| 483 | 1 | public function getNewId(): int |
|
| 484 | { |
||
| 485 | 1 | return $this->newId; |
|
| 486 | } |
||
| 487 | |||
| 488 | /** |
||
| 489 | * @return Modification[] |
||
| 490 | */ |
||
| 491 | 10 | public function getLastModified(): array |
|
| 492 | { |
||
| 493 | 10 | return $this->lastModified; |
|
| 494 | } |
||
| 495 | |||
| 496 | /** |
||
| 497 | * @param Modification[] $lastModified |
||
| 498 | */ |
||
| 499 | 3 | public function setLastModified(array $lastModified): void |
|
| 500 | { |
||
| 501 | 3 | $this->lastModified = $lastModified; |
|
| 502 | 3 | } |
|
| 503 | } |
||
| 504 |