1
|
|
|
<?php |
2
|
|
|
|
3
|
|
|
namespace Caxy\HtmlDiff; |
4
|
|
|
|
5
|
|
|
use Sunra\PhpSimple\HtmlDomParser; |
6
|
|
|
|
7
|
|
|
class ListDiffLines extends ListDiff |
8
|
|
|
{ |
9
|
|
|
const CLASS_LIST_ITEM_ADDED = 'normal new'; |
10
|
|
|
const CLASS_LIST_ITEM_DELETED = 'removed'; |
11
|
|
|
const CLASS_LIST_ITEM_CHANGED = 'replacement'; |
12
|
|
|
const CLASS_LIST_ITEM_NONE = 'normal'; |
13
|
|
|
|
14
|
|
|
protected static $containerTags = array('html', 'body', 'p', 'blockquote', |
15
|
|
|
'h1', 'h2', 'h3', 'h4', 'h5', 'pre', 'div', 'ul', 'ol', 'li', |
16
|
|
|
'table', 'tbody', 'tr', 'td', 'th', 'br', 'hr', 'code', 'dl', |
17
|
|
|
'dt', 'dd', 'input', 'form', 'img', 'span', 'a'); |
18
|
|
|
protected static $styleTags = array('i', 'b', 'strong', 'em', 'font', |
19
|
|
|
'big', 'del', 'tt', 'sub', 'sup', 'strike'); |
20
|
|
|
|
21
|
|
|
protected static $listContentTags = array( |
22
|
|
|
'h1', 'h2', 'h3', 'h4', 'h5', 'pre', 'div', 'br', 'hr', 'code', 'input', |
23
|
|
|
'form', 'img', 'span', 'a', 'i', 'b', 'strong', 'em', 'font', 'big', |
24
|
|
|
'del', 'tt', 'sub', 'sup', 'strike', |
25
|
|
|
); |
26
|
|
|
|
27
|
|
|
/** |
28
|
|
|
* @var LcsService |
29
|
|
|
*/ |
30
|
|
|
protected $lcsService; |
31
|
|
|
|
32
|
|
|
/** |
33
|
|
|
* @param string $oldText |
34
|
|
|
* @param string $newText |
35
|
|
|
* @param HtmlDiffConfig|null $config |
36
|
|
|
* |
37
|
|
|
* @return ListDiffLines |
38
|
|
|
*/ |
39
|
|
View Code Duplication |
public static function create($oldText, $newText, HtmlDiffConfig $config = null) |
|
|
|
|
40
|
|
|
{ |
41
|
|
|
$diff = new self($oldText, $newText); |
42
|
|
|
|
43
|
|
|
if (null !== $config) { |
44
|
|
|
$diff->setConfig($config); |
45
|
|
|
} |
46
|
|
|
|
47
|
|
|
return $diff; |
48
|
|
|
} |
49
|
|
|
|
50
|
|
|
public function build() |
51
|
|
|
{ |
52
|
|
|
$threshold = $this->config->getMatchThreshold(); |
53
|
|
|
|
54
|
|
|
$comparator = function($a, $b) use ($threshold) { |
55
|
|
|
$percentage = null; |
56
|
|
|
|
57
|
|
|
// Strip tags and check similarity |
58
|
|
|
$aStripped = strip_tags($a); |
59
|
|
|
$bStripped = strip_tags($b); |
60
|
|
|
similar_text($aStripped, $bStripped, $percentage); |
61
|
|
|
|
62
|
|
|
if ($percentage >= $threshold) { |
63
|
|
|
return true; |
64
|
|
|
} |
65
|
|
|
|
66
|
|
|
// Check w/o stripped tags |
67
|
|
|
similar_text($a, $b, $percentage); |
68
|
|
|
if ($percentage >= $threshold) { |
69
|
|
|
return true; |
70
|
|
|
} |
71
|
|
|
|
72
|
|
|
// Check common prefix/ suffix length |
73
|
|
|
$aCleaned = trim($aStripped); |
74
|
|
|
$bCleaned = trim($bStripped); |
75
|
|
|
if (strlen($aCleaned) === 0 || strlen($bCleaned) === 0) { |
76
|
|
|
$aCleaned = $a; |
77
|
|
|
$bCleaned = $b; |
78
|
|
|
} |
79
|
|
|
if (strlen($aCleaned) === 0 || strlen($bCleaned) === 0) { |
80
|
|
|
return false; |
81
|
|
|
} |
82
|
|
|
$prefixIndex = Preprocessor::diffCommonPrefix($aCleaned, $bCleaned); |
83
|
|
|
$suffixIndex = Preprocessor::diffCommonSuffix($aCleaned, $bCleaned); |
84
|
|
|
|
85
|
|
|
// Use shorter string, and see how much of it is leftover |
86
|
|
|
$len = min(strlen($aCleaned), strlen($bCleaned)); |
87
|
|
|
$remaining = $len - ($prefixIndex + $suffixIndex); |
88
|
|
|
$strLengthPercent = $len / max(strlen($a), strlen($b)); |
89
|
|
|
|
90
|
|
|
if ($remaining === 0 && $strLengthPercent > 0.1) { |
91
|
|
|
return true; |
92
|
|
|
} |
93
|
|
|
|
94
|
|
|
$percentRemaining = $remaining / $len; |
95
|
|
|
|
96
|
|
|
if ($strLengthPercent > 0.1 && $percentRemaining < 0.4) { |
|
|
|
|
97
|
|
|
return true; |
98
|
|
|
} |
99
|
|
|
|
100
|
|
|
return false; |
101
|
|
|
}; |
102
|
|
|
$this->lcsService = new LcsService($comparator); |
103
|
|
|
|
104
|
|
|
return $this->listByLines($this->oldText, $this->newText); |
105
|
|
|
} |
106
|
|
|
|
107
|
|
|
/** |
108
|
|
|
* @param \simple_html_dom|\simple_html_dom_node $dom |
109
|
|
|
* |
110
|
|
|
* @return \simple_html_dom_node[]|\simple_html_dom_node|null |
111
|
|
|
*/ |
112
|
|
|
protected function findListNode($dom) |
113
|
|
|
{ |
114
|
|
|
return $dom->find(implode(', ', static::$listTypes), 0); |
115
|
|
|
} |
116
|
|
|
|
117
|
|
|
protected function listByLines($old, $new) |
118
|
|
|
{ |
119
|
|
|
/* @var $newDom \simple_html_dom */ |
120
|
|
|
$newDom = HtmlDomParser::str_get_html($new); |
121
|
|
|
/* @var $oldDom \simple_html_dom */ |
122
|
|
|
$oldDom = HtmlDomParser::str_get_html($old); |
123
|
|
|
|
124
|
|
|
$newListNode = $this->findListNode($newDom); |
125
|
|
|
$oldListNode = $this->findListNode($oldDom); |
126
|
|
|
|
127
|
|
|
$operations = $this->getListItemOperations($oldListNode, $newListNode); |
|
|
|
|
128
|
|
|
|
129
|
|
|
return $this->processOperations($operations, $oldListNode, $newListNode); |
130
|
|
|
} |
131
|
|
|
|
132
|
|
|
/** |
133
|
|
|
* @param \simple_html_dom_node $oldListNode |
134
|
|
|
* @param \simple_html_dom_node $newListNode |
135
|
|
|
* |
136
|
|
|
* @return array|Operation[] |
137
|
|
|
*/ |
138
|
|
|
protected function getListItemOperations($oldListNode, $newListNode) |
139
|
|
|
{ |
140
|
|
|
// Prepare arrays of list item content to use in LCS algorithm |
141
|
|
|
$oldListText = $this->getListTextArray($oldListNode); |
142
|
|
|
$newListText = $this->getListTextArray($newListNode); |
143
|
|
|
|
144
|
|
|
$j = $this->lcsService->longestCommonSubsequence($oldListText, $newListText); |
145
|
|
|
|
146
|
|
|
|
147
|
|
|
$m = count($oldListText); |
148
|
|
|
$n = count($newListText); |
149
|
|
|
|
150
|
|
|
$operations = []; |
151
|
|
|
$lineInOld = 0; |
152
|
|
|
$lineInNew = 0; |
153
|
|
|
$j[$m + 1] = $n + 1; |
154
|
|
|
foreach ($j as $i => $match) { |
155
|
|
|
if ($match !== 0) { |
156
|
|
|
if ($match > ($lineInNew + 1) && $i === ($lineInOld + 1)) { |
157
|
|
|
// Add items before this |
158
|
|
|
$operations[] = new Operation(Operation::ADDED, $lineInOld, $lineInOld, $lineInNew + 1, $match - 1); |
159
|
|
|
} elseif ($i > ($lineInOld + 1) && $match === ($lineInNew + 1)) { |
160
|
|
|
// Delete items before this |
161
|
|
|
$operations[] = new Operation(Operation::DELETED, $lineInOld + 1, $i - 1, $lineInNew, $lineInNew); |
162
|
|
|
} elseif ($match !== ($lineInNew + 1) && $i !== ($lineInOld + 1)) { |
163
|
|
|
// Change |
164
|
|
|
$operations[] = new Operation(Operation::CHANGED, $lineInOld + 1, $i - 1, $lineInNew + 1, $match - 1); |
|
|
|
|
165
|
|
|
} |
166
|
|
|
|
167
|
|
|
$lineInNew = $match; |
168
|
|
|
$lineInOld = $i; |
169
|
|
|
} |
170
|
|
|
} |
171
|
|
|
|
172
|
|
|
return $operations; |
173
|
|
|
} |
174
|
|
|
|
175
|
|
|
protected function getListTextArray($listNode) |
176
|
|
|
{ |
177
|
|
|
$output = array(); |
178
|
|
|
foreach ($listNode->children() as $listItem) { |
179
|
|
|
$output[] = $this->getRelevantNodeText($listItem); |
180
|
|
|
} |
181
|
|
|
|
182
|
|
|
return $output; |
183
|
|
|
} |
184
|
|
|
|
185
|
|
|
protected function getRelevantNodeText(\simple_html_dom_node $node) |
|
|
|
|
186
|
|
|
{ |
187
|
|
|
if (!$node->hasChildNodes()) { |
188
|
|
|
return $node->innertext(); |
189
|
|
|
} |
190
|
|
|
|
191
|
|
|
$output = ''; |
192
|
|
|
foreach ($node->nodes as $child) { |
193
|
|
|
/* @var $child \simple_html_dom_node */ |
194
|
|
|
if (!$child->hasChildNodes()) { |
195
|
|
|
$output .= $child->outertext(); |
196
|
|
|
} elseif (in_array($child->nodeName(), static::$listContentTags)) { |
197
|
|
|
$output .= sprintf('<%1$s>%2$s</%1$s>', $child->nodeName(), $this->getRelevantNodeText($child)); |
198
|
|
|
} |
199
|
|
|
} |
200
|
|
|
|
201
|
|
|
return $output; |
202
|
|
|
} |
203
|
|
|
|
204
|
|
|
/** |
205
|
|
|
* @param $li |
206
|
|
|
*/ |
207
|
|
View Code Duplication |
protected function deleteListItem($li) |
|
|
|
|
208
|
|
|
{ |
209
|
|
|
$li->setAttribute('class', trim($li->getAttribute('class').' '.self::CLASS_LIST_ITEM_DELETED)); |
210
|
|
|
$li->innertext = sprintf('<del>%s</del>', $li->innertext); |
211
|
|
|
|
212
|
|
|
return $li->outertext; |
213
|
|
|
} |
214
|
|
|
|
215
|
|
|
/** |
216
|
|
|
* @param $li |
217
|
|
|
* |
218
|
|
|
* @return string |
219
|
|
|
*/ |
220
|
|
View Code Duplication |
protected function addListItem($li, $replacement = false) |
|
|
|
|
221
|
|
|
{ |
222
|
|
|
$li->setAttribute('class', trim($li->getAttribute('class').' '.($replacement ? self::CLASS_LIST_ITEM_CHANGED : self::CLASS_LIST_ITEM_ADDED))); |
|
|
|
|
223
|
|
|
$li->innertext = sprintf('<ins>%s</ins>', $li->innertext); |
224
|
|
|
|
225
|
|
|
return $li->outertext; |
226
|
|
|
} |
227
|
|
|
|
228
|
|
|
/** |
229
|
|
|
* @param $operations |
230
|
|
|
* @param $oldListNode |
231
|
|
|
* @param $newListNode |
232
|
|
|
* |
233
|
|
|
* @return mixed |
234
|
|
|
*/ |
235
|
|
|
protected function processOperations($operations, $oldListNode, $newListNode) |
236
|
|
|
{ |
237
|
|
|
$output = ''; |
238
|
|
|
|
239
|
|
|
$indexInOld = 0; |
240
|
|
|
$indexInNew = 0; |
241
|
|
|
$lastOperation = null; |
242
|
|
|
|
243
|
|
|
foreach ($operations as $operation) { |
244
|
|
|
$replaced = false; |
245
|
|
|
while ($operation->startInOld > ($operation->action === Operation::ADDED ? $indexInOld : $indexInOld + 1)) { |
246
|
|
|
$li = $oldListNode->children($indexInOld); |
247
|
|
|
$matchingLi = null; |
248
|
|
|
if ($operation->startInNew > ($operation->action === Operation::DELETED ? $indexInNew |
249
|
|
|
: $indexInNew + 1) |
250
|
|
|
) { |
251
|
|
|
$matchingLi = $newListNode->children($indexInNew); |
252
|
|
|
} |
253
|
|
View Code Duplication |
if (null !== $matchingLi) { |
|
|
|
|
254
|
|
|
$htmlDiff = HtmlDiff::create($li->innertext, $matchingLi->innertext, $this->config); |
255
|
|
|
$li->innertext = $htmlDiff->build(); |
256
|
|
|
$indexInNew++; |
257
|
|
|
} |
258
|
|
|
$class = self::CLASS_LIST_ITEM_NONE; |
259
|
|
|
|
260
|
|
|
if ($lastOperation === Operation::DELETED && !$replaced) { |
261
|
|
|
$class = self::CLASS_LIST_ITEM_CHANGED; |
262
|
|
|
$replaced = true; |
263
|
|
|
} |
264
|
|
|
$li->setAttribute('class', trim($li->getAttribute('class').' '.$class)); |
265
|
|
|
|
266
|
|
|
$output .= $li->outertext; |
267
|
|
|
$indexInOld++; |
268
|
|
|
} |
269
|
|
|
|
270
|
|
|
switch ($operation->action) { |
271
|
|
|
case Operation::ADDED: |
272
|
|
View Code Duplication |
for ($i = $operation->startInNew; $i <= $operation->endInNew; $i++) { |
|
|
|
|
273
|
|
|
$output .= $this->addListItem($newListNode->children($i - 1)); |
274
|
|
|
} |
275
|
|
|
$indexInNew = $operation->endInNew; |
276
|
|
|
break; |
277
|
|
|
|
278
|
|
|
case Operation::DELETED: |
279
|
|
View Code Duplication |
for ($i = $operation->startInOld; $i <= $operation->endInOld; $i++) { |
|
|
|
|
280
|
|
|
$output .= $this->deleteListItem($oldListNode->children($i - 1)); |
281
|
|
|
} |
282
|
|
|
$indexInOld = $operation->endInOld; |
283
|
|
|
break; |
284
|
|
|
|
285
|
|
|
case Operation::CHANGED: |
286
|
|
|
$changeDelta = 0; |
287
|
|
View Code Duplication |
for ($i = $operation->startInOld; $i <= $operation->endInOld; $i++) { |
|
|
|
|
288
|
|
|
$output .= $this->deleteListItem($oldListNode->children($i - 1)); |
289
|
|
|
$changeDelta--; |
290
|
|
|
} |
291
|
|
View Code Duplication |
for ($i = $operation->startInNew; $i <= $operation->endInNew; $i++) { |
|
|
|
|
292
|
|
|
$output .= $this->addListItem($newListNode->children($i - 1), $changeDelta < 0); |
293
|
|
|
$changeDelta++; |
294
|
|
|
} |
295
|
|
|
$indexInOld = $operation->endInOld; |
296
|
|
|
$indexInNew = $operation->endInNew; |
297
|
|
|
break; |
298
|
|
|
} |
299
|
|
|
|
300
|
|
|
$lastOperation = $operation->action; |
301
|
|
|
} |
302
|
|
|
|
303
|
|
|
$replaced = false; |
|
|
|
|
304
|
|
|
$oldCount = count($oldListNode->children()); |
305
|
|
|
$newCount = count($newListNode->children()); |
306
|
|
|
while ($indexInOld < $oldCount) { |
307
|
|
|
$li = $oldListNode->children($indexInOld); |
308
|
|
|
$matchingLi = null; |
309
|
|
|
if ($indexInNew < $newCount) { |
310
|
|
|
$matchingLi = $newListNode->children($indexInNew); |
311
|
|
|
} |
312
|
|
View Code Duplication |
if (null !== $matchingLi) { |
|
|
|
|
313
|
|
|
$htmlDiff = HtmlDiff::create($li->innertext(), $matchingLi->innertext(), $this->config); |
314
|
|
|
$li->innertext = $htmlDiff->build(); |
315
|
|
|
$indexInNew++; |
316
|
|
|
} |
317
|
|
|
$class = self::CLASS_LIST_ITEM_NONE; |
318
|
|
|
|
319
|
|
|
if ($lastOperation === Operation::DELETED) { |
320
|
|
|
$class = self::CLASS_LIST_ITEM_CHANGED; |
321
|
|
|
} |
322
|
|
|
$li->setAttribute('class', trim($li->getAttribute('class').' '.$class)); |
323
|
|
|
|
324
|
|
|
$output .= $li->outertext; |
325
|
|
|
$indexInOld++; |
326
|
|
|
} |
327
|
|
|
|
328
|
|
|
$newListNode->innertext = $output; |
329
|
|
|
$newListNode->setAttribute('class', trim($newListNode->getAttribute('class').' diff-list')); |
330
|
|
|
|
331
|
|
|
return $newListNode->outertext; |
332
|
|
|
} |
333
|
|
|
} |
334
|
|
|
|
Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.
You can also find more detailed suggestions in the “Code” section of your repository.