1
|
|
|
<?php |
2
|
|
|
|
3
|
|
|
namespace Caxy\HtmlDiff; |
4
|
|
|
|
5
|
|
|
use Caxy\HtmlDiff\Strategy\ListItemMatchStrategy; |
6
|
|
|
use Sunra\PhpSimple\HtmlDomParser; |
7
|
|
|
|
8
|
|
|
class ListDiffLines extends AbstractDiff |
9
|
|
|
{ |
10
|
|
|
const CLASS_LIST_ITEM_ADDED = 'normal new'; |
11
|
|
|
const CLASS_LIST_ITEM_DELETED = 'removed'; |
12
|
|
|
const CLASS_LIST_ITEM_CHANGED = 'replacement'; |
13
|
|
|
const CLASS_LIST_ITEM_NONE = 'normal'; |
14
|
|
|
|
15
|
|
|
protected static $listTypes = array('ul', 'ol', 'dl'); |
16
|
|
|
|
17
|
|
|
/** |
18
|
|
|
* List of tags that should be included when retrieving |
19
|
|
|
* text from a single list item that will be used in |
20
|
|
|
* matching logic (and only in matching logic). |
21
|
|
|
* |
22
|
|
|
* @see getRelevantNodeText() |
23
|
|
|
* |
24
|
|
|
* @var array |
25
|
|
|
*/ |
26
|
|
|
protected static $listContentTags = array( |
27
|
|
|
'h1','h2','h3','h4','h5','pre','div','br','hr','code', |
28
|
|
|
'input','form','img','span','a','i','b','strong','em', |
29
|
|
|
'font','big','del','tt','sub','sup','strike', |
30
|
|
|
); |
31
|
|
|
|
32
|
|
|
/** |
33
|
|
|
* @var LcsService |
34
|
|
|
*/ |
35
|
|
|
protected $lcsService; |
36
|
|
|
|
37
|
|
|
/** |
38
|
|
|
* @param string $oldText |
39
|
|
|
* @param string $newText |
40
|
|
|
* @param HtmlDiffConfig|null $config |
41
|
|
|
* |
42
|
|
|
* @return ListDiffLines |
43
|
|
|
*/ |
44
|
7 |
View Code Duplication |
public static function create($oldText, $newText, HtmlDiffConfig $config = null) |
|
|
|
|
45
|
|
|
{ |
46
|
7 |
|
$diff = new self($oldText, $newText); |
47
|
|
|
|
48
|
7 |
|
if (null !== $config) { |
49
|
7 |
|
$diff->setConfig($config); |
50
|
7 |
|
} |
51
|
|
|
|
52
|
7 |
|
return $diff; |
53
|
|
|
} |
54
|
|
|
|
55
|
|
|
/** |
56
|
|
|
* {@inheritDoc} |
57
|
|
|
*/ |
58
|
7 |
|
public function build() |
59
|
|
|
{ |
60
|
7 |
|
if ($this->hasDiffCache() && $this->getDiffCache()->contains($this->oldText, $this->newText)) { |
61
|
|
|
$this->content = $this->getDiffCache()->fetch($this->oldText, $this->newText); |
62
|
|
|
|
63
|
|
|
return $this->content; |
64
|
|
|
} |
65
|
|
|
|
66
|
7 |
|
$matchStrategy = new ListItemMatchStrategy($this->config->getMatchThreshold()); |
67
|
7 |
|
$this->lcsService = new LcsService($matchStrategy); |
68
|
|
|
|
69
|
7 |
|
return $this->listByLines($this->oldText, $this->newText); |
70
|
|
|
} |
71
|
|
|
|
72
|
|
|
/** |
73
|
|
|
* @param string $old |
74
|
|
|
* @param string $new |
75
|
|
|
* |
76
|
|
|
* @return string |
77
|
|
|
*/ |
78
|
7 |
|
protected function listByLines($old, $new) |
79
|
|
|
{ |
80
|
|
|
/* @var $newDom \simple_html_dom */ |
81
|
7 |
|
$newDom = HtmlDomParser::str_get_html($new); |
82
|
|
|
/* @var $oldDom \simple_html_dom */ |
83
|
7 |
|
$oldDom = HtmlDomParser::str_get_html($old); |
84
|
|
|
|
85
|
7 |
|
$newListNode = $this->findListNode($newDom); |
86
|
7 |
|
$oldListNode = $this->findListNode($oldDom); |
87
|
|
|
|
88
|
7 |
|
$operations = $this->getListItemOperations($oldListNode, $newListNode); |
|
|
|
|
89
|
|
|
|
90
|
7 |
|
return $this->processOperations($operations, $oldListNode, $newListNode); |
|
|
|
|
91
|
|
|
} |
92
|
|
|
|
93
|
|
|
/** |
94
|
|
|
* @param \simple_html_dom|\simple_html_dom_node $dom |
95
|
|
|
* |
96
|
|
|
* @return \simple_html_dom_node[]|\simple_html_dom_node|null |
97
|
|
|
*/ |
98
|
7 |
|
protected function findListNode($dom) |
99
|
|
|
{ |
100
|
7 |
|
return $dom->find(implode(', ', static::$listTypes), 0); |
101
|
|
|
} |
102
|
|
|
|
103
|
|
|
/** |
104
|
|
|
* @param \simple_html_dom_node $oldListNode |
105
|
|
|
* @param \simple_html_dom_node $newListNode |
106
|
|
|
* |
107
|
|
|
* @return array|Operation[] |
108
|
|
|
*/ |
109
|
7 |
|
protected function getListItemOperations(\simple_html_dom_node $oldListNode, \simple_html_dom_node $newListNode) |
110
|
|
|
{ |
111
|
|
|
// Prepare arrays of list item content to use in LCS algorithm |
112
|
7 |
|
$oldListText = $this->getListTextArray($oldListNode); |
113
|
7 |
|
$newListText = $this->getListTextArray($newListNode); |
114
|
|
|
|
115
|
7 |
|
$lcsMatches = $this->lcsService->longestCommonSubsequence($oldListText, $newListText); |
116
|
|
|
|
117
|
7 |
|
$oldLength = count($oldListText); |
118
|
7 |
|
$newLength = count($newListText); |
119
|
|
|
|
120
|
7 |
|
$operations = []; |
121
|
7 |
|
$currentLineInOld = 0; |
122
|
7 |
|
$currentLineInNew = 0; |
123
|
7 |
|
$lcsMatches[$oldLength + 1] = $newLength + 1; |
124
|
7 |
|
foreach ($lcsMatches as $matchInOld => $matchInNew) { |
125
|
|
|
// No matching line in new list |
126
|
7 |
|
if ($matchInNew === 0) { |
127
|
7 |
|
continue; |
128
|
|
|
} |
129
|
|
|
|
130
|
7 |
|
$nextLineInOld = $currentLineInOld + 1; |
131
|
7 |
|
$nextLineInNew = $currentLineInNew + 1; |
132
|
|
|
|
133
|
7 |
|
if ($matchInNew > $nextLineInNew && $matchInOld > $nextLineInOld) { |
134
|
|
|
// Change |
135
|
1 |
|
$operations[] = new Operation( |
136
|
1 |
|
Operation::CHANGED, |
137
|
1 |
|
$nextLineInOld, |
138
|
1 |
|
$matchInOld - 1, |
139
|
1 |
|
$nextLineInNew, |
140
|
|
|
$matchInNew - 1 |
141
|
1 |
|
); |
142
|
7 |
|
} elseif ($matchInNew > $nextLineInNew && $matchInOld === $nextLineInOld) { |
143
|
|
|
// Add items before this |
144
|
3 |
|
$operations[] = new Operation( |
145
|
3 |
|
Operation::ADDED, |
146
|
3 |
|
$currentLineInOld, |
147
|
3 |
|
$currentLineInOld, |
148
|
3 |
|
$nextLineInNew, |
149
|
|
|
$matchInNew - 1 |
150
|
3 |
|
); |
151
|
7 |
|
} elseif ($matchInNew === $nextLineInNew && $matchInOld > $nextLineInOld) { |
152
|
|
|
// Delete items before this |
153
|
2 |
|
$operations[] = new Operation( |
154
|
2 |
|
Operation::DELETED, |
155
|
2 |
|
$nextLineInOld, |
156
|
2 |
|
$matchInOld - 1, |
157
|
2 |
|
$currentLineInNew, |
158
|
|
|
$currentLineInNew |
159
|
2 |
|
); |
160
|
2 |
|
} |
161
|
|
|
|
162
|
7 |
|
$currentLineInNew = $matchInNew; |
163
|
7 |
|
$currentLineInOld = $matchInOld; |
164
|
7 |
|
} |
165
|
|
|
|
166
|
7 |
|
return $operations; |
167
|
|
|
} |
168
|
|
|
|
169
|
|
|
/** |
170
|
|
|
* @param \simple_html_dom_node $listNode |
171
|
|
|
* |
172
|
|
|
* @return array |
173
|
|
|
*/ |
174
|
7 |
|
protected function getListTextArray($listNode) |
175
|
|
|
{ |
176
|
7 |
|
$output = array(); |
177
|
7 |
|
foreach ($listNode->children() as $listItem) { |
178
|
7 |
|
$output[] = $this->getRelevantNodeText($listItem); |
179
|
7 |
|
} |
180
|
|
|
|
181
|
7 |
|
return $output; |
182
|
|
|
} |
183
|
|
|
|
184
|
|
|
/** |
185
|
|
|
* @param \simple_html_dom_node $node |
186
|
|
|
* |
187
|
|
|
* @return string |
188
|
|
|
*/ |
189
|
7 |
|
protected function getRelevantNodeText(\simple_html_dom_node $node) |
190
|
|
|
{ |
191
|
7 |
|
if (!$node->hasChildNodes()) { |
192
|
3 |
|
return $node->innertext(); |
193
|
|
|
} |
194
|
|
|
|
195
|
5 |
|
$output = ''; |
196
|
5 |
|
foreach ($node->nodes as $child) { |
197
|
|
|
/* @var $child \simple_html_dom_node */ |
198
|
5 |
|
if (!$child->hasChildNodes()) { |
199
|
5 |
|
$output .= $child->outertext(); |
200
|
5 |
|
} elseif (in_array($child->nodeName(), static::$listContentTags, true)) { |
201
|
|
|
$output .= sprintf('<%1$s>%2$s</%1$s>', $child->nodeName(), $this->getRelevantNodeText($child)); |
202
|
|
|
} |
203
|
5 |
|
} |
204
|
|
|
|
205
|
5 |
|
return $output; |
206
|
|
|
} |
207
|
|
|
|
208
|
|
|
/** |
209
|
|
|
* @param \simple_html_dom_node $li |
210
|
|
|
* |
211
|
|
|
* @return string |
212
|
|
|
*/ |
213
|
3 |
View Code Duplication |
protected function deleteListItem($li) |
|
|
|
|
214
|
|
|
{ |
215
|
3 |
|
$this->addClassToNode($li, self::CLASS_LIST_ITEM_DELETED); |
216
|
3 |
|
$li->innertext = sprintf('<del>%s</del>', $li->innertext); |
217
|
|
|
|
218
|
3 |
|
return $li->outertext; |
219
|
|
|
} |
220
|
|
|
|
221
|
|
|
/** |
222
|
|
|
* @param \simple_html_dom_node $li |
223
|
|
|
* @param bool $replacement |
224
|
|
|
* |
225
|
|
|
* @return string |
226
|
|
|
*/ |
227
|
4 |
View Code Duplication |
protected function addListItem($li, $replacement = false) |
|
|
|
|
228
|
|
|
{ |
229
|
4 |
|
$this->addClassToNode($li, $replacement ? self::CLASS_LIST_ITEM_CHANGED : self::CLASS_LIST_ITEM_ADDED); |
230
|
4 |
|
$li->innertext = sprintf('<ins>%s</ins>', $li->innertext); |
231
|
|
|
|
232
|
4 |
|
return $li->outertext; |
233
|
|
|
} |
234
|
|
|
|
235
|
|
|
/** |
236
|
|
|
* @param Operation[]|array $operations |
237
|
|
|
* @param \simple_html_dom_node $oldListNode |
238
|
|
|
* @param \simple_html_dom_node $newListNode |
239
|
|
|
* |
240
|
|
|
* @return string |
241
|
|
|
*/ |
242
|
7 |
|
protected function processOperations($operations, $oldListNode, $newListNode) |
243
|
|
|
{ |
244
|
7 |
|
$output = ''; |
245
|
|
|
|
246
|
7 |
|
$indexInOld = 0; |
247
|
7 |
|
$indexInNew = 0; |
248
|
7 |
|
$lastOperation = null; |
249
|
|
|
|
250
|
7 |
|
foreach ($operations as $operation) { |
251
|
5 |
|
$replaced = false; |
252
|
5 |
|
while ($operation->startInOld > ($operation->action === Operation::ADDED ? $indexInOld : $indexInOld + 1)) { |
253
|
4 |
|
$li = $oldListNode->children($indexInOld); |
254
|
4 |
|
$matchingLi = null; |
255
|
4 |
|
if ($operation->startInNew > ($operation->action === Operation::DELETED ? $indexInNew |
256
|
4 |
|
: $indexInNew + 1) |
257
|
4 |
|
) { |
258
|
4 |
|
$matchingLi = $newListNode->children($indexInNew); |
259
|
4 |
|
} |
260
|
4 |
View Code Duplication |
if (null !== $matchingLi) { |
|
|
|
|
261
|
4 |
|
$htmlDiff = HtmlDiff::create($li->innertext, $matchingLi->innertext, $this->config); |
262
|
4 |
|
$li->innertext = $htmlDiff->build(); |
263
|
4 |
|
$indexInNew++; |
264
|
4 |
|
} |
265
|
4 |
|
$class = self::CLASS_LIST_ITEM_NONE; |
266
|
|
|
|
267
|
4 |
|
if ($lastOperation === Operation::DELETED && !$replaced) { |
268
|
1 |
|
$class = self::CLASS_LIST_ITEM_CHANGED; |
269
|
1 |
|
$replaced = true; |
270
|
1 |
|
} |
271
|
4 |
|
$li->setAttribute('class', trim($li->getAttribute('class').' '.$class)); |
272
|
|
|
|
273
|
4 |
|
$output .= $li->outertext; |
274
|
4 |
|
$indexInOld++; |
275
|
4 |
|
} |
276
|
|
|
|
277
|
5 |
|
switch ($operation->action) { |
278
|
5 |
|
case Operation::ADDED: |
279
|
3 |
View Code Duplication |
for ($i = $operation->startInNew; $i <= $operation->endInNew; $i++) { |
|
|
|
|
280
|
3 |
|
$output .= $this->addListItem($newListNode->children($i - 1)); |
281
|
3 |
|
} |
282
|
3 |
|
$indexInNew = $operation->endInNew; |
283
|
3 |
|
break; |
284
|
|
|
|
285
|
3 |
|
case Operation::DELETED: |
286
|
2 |
View Code Duplication |
for ($i = $operation->startInOld; $i <= $operation->endInOld; $i++) { |
|
|
|
|
287
|
2 |
|
$output .= $this->deleteListItem($oldListNode->children($i - 1)); |
288
|
2 |
|
} |
289
|
2 |
|
$indexInOld = $operation->endInOld; |
290
|
2 |
|
break; |
291
|
|
|
|
292
|
1 |
|
case Operation::CHANGED: |
293
|
1 |
|
$changeDelta = 0; |
294
|
1 |
View Code Duplication |
for ($i = $operation->startInOld; $i <= $operation->endInOld; $i++) { |
|
|
|
|
295
|
1 |
|
$output .= $this->deleteListItem($oldListNode->children($i - 1)); |
296
|
1 |
|
$changeDelta--; |
297
|
1 |
|
} |
298
|
1 |
View Code Duplication |
for ($i = $operation->startInNew; $i <= $operation->endInNew; $i++) { |
|
|
|
|
299
|
1 |
|
$output .= $this->addListItem($newListNode->children($i - 1), $changeDelta < 0); |
300
|
1 |
|
$changeDelta++; |
301
|
1 |
|
} |
302
|
1 |
|
$indexInOld = $operation->endInOld; |
303
|
1 |
|
$indexInNew = $operation->endInNew; |
304
|
1 |
|
break; |
305
|
5 |
|
} |
306
|
|
|
|
307
|
5 |
|
$lastOperation = $operation->action; |
308
|
7 |
|
} |
309
|
|
|
|
310
|
7 |
|
$oldCount = count($oldListNode->children()); |
311
|
7 |
|
$newCount = count($newListNode->children()); |
312
|
7 |
|
while ($indexInOld < $oldCount) { |
313
|
4 |
|
$li = $oldListNode->children($indexInOld); |
314
|
4 |
|
$matchingLi = null; |
315
|
4 |
|
if ($indexInNew < $newCount) { |
316
|
4 |
|
$matchingLi = $newListNode->children($indexInNew); |
317
|
4 |
|
} |
318
|
4 |
View Code Duplication |
if (null !== $matchingLi) { |
|
|
|
|
319
|
4 |
|
$htmlDiff = HtmlDiff::create($li->innertext(), $matchingLi->innertext(), $this->config); |
320
|
4 |
|
$li->innertext = $htmlDiff->build(); |
321
|
4 |
|
$indexInNew++; |
322
|
4 |
|
} |
323
|
4 |
|
$class = self::CLASS_LIST_ITEM_NONE; |
324
|
|
|
|
325
|
4 |
|
if ($lastOperation === Operation::DELETED) { |
326
|
|
|
$class = self::CLASS_LIST_ITEM_CHANGED; |
327
|
|
|
} |
328
|
4 |
|
$li->setAttribute('class', trim($li->getAttribute('class').' '.$class)); |
329
|
|
|
|
330
|
4 |
|
$output .= $li->outertext; |
331
|
4 |
|
$indexInOld++; |
332
|
4 |
|
} |
333
|
|
|
|
334
|
7 |
|
$newListNode->innertext = $output; |
335
|
7 |
|
$newListNode->setAttribute('class', trim($newListNode->getAttribute('class').' diff-list')); |
336
|
|
|
|
337
|
7 |
|
return $newListNode->outertext; |
338
|
|
|
} |
339
|
|
|
|
340
|
|
|
/** |
341
|
|
|
* @param \simple_html_dom_node $node |
342
|
|
|
* @param string $class |
343
|
|
|
*/ |
344
|
5 |
|
protected function addClassToNode(\simple_html_dom_node $node, $class) |
345
|
|
|
{ |
346
|
5 |
|
$node->setAttribute('class', trim(sprintf('%s %s', $node->getAttribute('class'), $class))); |
347
|
5 |
|
} |
348
|
|
|
} |
349
|
|
|
|
Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.
You can also find more detailed suggestions in the “Code” section of your repository.