1
|
|
|
import copy |
2
|
|
|
import difflib |
3
|
|
|
|
4
|
|
|
from coalib.results.LineDiff import LineDiff, ConflictError |
5
|
|
|
from coalib.results.SourceRange import SourceRange |
6
|
|
|
from coala_utils.decorators import enforce_signature, generate_eq |
7
|
|
|
|
8
|
|
|
|
9
|
|
|
@generate_eq("_file", "modified", "rename", "delete") |
10
|
|
|
class Diff: |
11
|
|
|
""" |
12
|
|
|
A Diff result represents a difference for one file. |
13
|
|
|
""" |
14
|
|
|
|
15
|
|
|
def __init__(self, file_list, filename=None, rename=False, delete=False): |
16
|
|
|
""" |
17
|
|
|
Creates an empty diff for the given file. |
18
|
|
|
|
19
|
|
|
:param file_list: The original (unmodified) file as a list of its |
20
|
|
|
lines. |
21
|
|
|
:param filename: None or a string containing the name of the file |
22
|
|
|
:param rename: False or str containing new name of file. |
23
|
|
|
:param delete: True if file is set to be deleted. |
24
|
|
|
""" |
25
|
|
|
self._changes = {} |
26
|
|
|
self._file = file_list |
27
|
|
|
self.filename = filename |
28
|
|
|
self.rename = rename |
29
|
|
|
self.delete = delete |
30
|
|
|
|
31
|
|
|
@classmethod |
32
|
|
|
def from_string_arrays(cls, file_array_1, file_array_2, rename=False): |
33
|
|
|
""" |
34
|
|
|
Creates a Diff object from two arrays containing strings. |
35
|
|
|
|
36
|
|
|
If this Diff is applied to the original array, the second array will be |
37
|
|
|
created. |
38
|
|
|
|
39
|
|
|
:param file_array_1: Original array |
40
|
|
|
:param file_array_2: Array to compare |
41
|
|
|
:param rename: False or str containing new name of file. |
42
|
|
|
""" |
43
|
|
|
result = cls(file_array_1, rename=rename) |
44
|
|
|
|
45
|
|
|
matcher = difflib.SequenceMatcher(None, file_array_1, file_array_2) |
46
|
|
|
# We use this because its faster (generator) and doesn't yield as much |
47
|
|
|
# useless information as get_opcodes. |
48
|
|
|
for change_group in matcher.get_grouped_opcodes(1): |
49
|
|
|
for (tag, |
50
|
|
|
a_index_1, |
51
|
|
|
a_index_2, |
52
|
|
|
b_index_1, |
53
|
|
|
b_index_2) in change_group: |
54
|
|
|
if tag == "delete": |
55
|
|
|
for index in range(a_index_1+1, a_index_2+1): |
56
|
|
|
result.delete_line(index) |
57
|
|
|
elif tag == "insert": |
58
|
|
|
# We add after line, they add before, so dont add 1 here |
59
|
|
|
result.add_lines(a_index_1, |
60
|
|
|
file_array_2[b_index_1:b_index_2]) |
61
|
|
|
elif tag == "replace": |
62
|
|
|
result.change_line(a_index_1+1, |
63
|
|
|
file_array_1[a_index_1], |
64
|
|
|
file_array_2[b_index_1]) |
65
|
|
|
result.add_lines(a_index_1+1, |
66
|
|
|
file_array_2[b_index_1+1:b_index_2]) |
67
|
|
|
for index in range(a_index_1+2, a_index_2+1): |
68
|
|
|
result.delete_line(index) |
69
|
|
|
|
70
|
|
|
return result |
71
|
|
|
|
72
|
|
|
@classmethod |
73
|
|
|
def from_clang_fixit(cls, fixit, file): |
74
|
|
|
""" |
75
|
|
|
Creates a Diff object from a given clang fixit and the file contents. |
76
|
|
|
|
77
|
|
|
:param fixit: A cindex.Fixit object. |
78
|
|
|
:param file: A list of lines in the file to apply the fixit to. |
79
|
|
|
:return: The corresponding Diff object. |
80
|
|
|
""" |
81
|
|
|
assert isinstance(file, (list, tuple)) |
82
|
|
|
|
83
|
|
|
oldvalue = '\n'.join(file[fixit.range.start.line-1: |
84
|
|
|
fixit.range.end.line]) |
85
|
|
|
endindex = fixit.range.end.column - len(file[fixit.range.end.line-1])-1 |
86
|
|
|
|
87
|
|
|
newvalue = (oldvalue[:fixit.range.start.column-1] + |
88
|
|
|
fixit.value + |
89
|
|
|
oldvalue[endindex:]) |
90
|
|
|
new_file = (file[:fixit.range.start.line-1] + |
91
|
|
|
type(file)(newvalue.splitlines(True)) + |
92
|
|
|
file[fixit.range.end.line:]) |
93
|
|
|
|
94
|
|
|
return cls.from_string_arrays(file, new_file) |
95
|
|
|
|
96
|
|
|
def _get_change(self, line_nr, min_line=1): |
97
|
|
|
if not isinstance(line_nr, int): |
98
|
|
|
raise TypeError("line_nr needs to be an integer.") |
99
|
|
|
if line_nr < min_line: |
100
|
|
|
raise ValueError("The given line number is not allowed.") |
101
|
|
|
|
102
|
|
|
return self._changes.get(line_nr, LineDiff()) |
103
|
|
|
|
104
|
|
|
def stats(self): |
105
|
|
|
""" |
106
|
|
|
Returns tuple containing number of additions and deletions in the diff. |
107
|
|
|
""" |
108
|
|
|
additions = 0 |
109
|
|
|
deletions = 0 |
110
|
|
|
for line_diff in self._changes.values(): |
111
|
|
|
if line_diff.change: |
112
|
|
|
additions += 1 |
113
|
|
|
deletions += 1 |
114
|
|
|
elif line_diff.delete: |
115
|
|
|
deletions += 1 |
116
|
|
|
if line_diff.add_after: |
117
|
|
|
additions += len(line_diff.add_after) |
118
|
|
|
return additions, deletions |
119
|
|
|
|
120
|
|
|
def __len__(self): |
121
|
|
|
""" |
122
|
|
|
Returns total number of additions and deletions in diff. |
123
|
|
|
""" |
124
|
|
|
return sum(self.stats()) |
125
|
|
|
|
126
|
|
|
@property |
127
|
|
|
def rename(self): |
128
|
|
|
""" |
129
|
|
|
:return: string containing new name of the file. |
130
|
|
|
""" |
131
|
|
|
return self._rename |
132
|
|
|
|
133
|
|
|
@rename.setter |
134
|
|
|
@enforce_signature |
135
|
|
|
def rename(self, rename: (str, False)): |
136
|
|
|
""" |
137
|
|
|
:param rename: False or string containing new name of file. |
138
|
|
|
""" |
139
|
|
|
self._rename = rename |
140
|
|
|
|
141
|
|
|
@property |
142
|
|
|
def delete(self): |
143
|
|
|
""" |
144
|
|
|
:return: True if file is set to be deleted. |
145
|
|
|
""" |
146
|
|
|
return self._delete |
147
|
|
|
|
148
|
|
|
@delete.setter |
149
|
|
|
@enforce_signature |
150
|
|
|
def delete(self, delete: bool): |
151
|
|
|
""" |
152
|
|
|
:param delete: True if file is set to be deleted, False otherwise. |
153
|
|
|
""" |
154
|
|
|
self._delete = delete |
155
|
|
|
|
156
|
|
|
@property |
157
|
|
|
def original(self): |
158
|
|
|
""" |
159
|
|
|
Retrieves the original file. |
160
|
|
|
""" |
161
|
|
|
return self._file |
162
|
|
|
|
163
|
|
|
@property |
164
|
|
|
def modified(self): |
165
|
|
|
""" |
166
|
|
|
Calculates the modified file, after applying the Diff to the original. |
167
|
|
|
""" |
168
|
|
|
result = [] |
169
|
|
|
|
170
|
|
|
if self.delete: |
171
|
|
|
return result |
172
|
|
|
|
173
|
|
|
current_line = 0 |
174
|
|
|
|
175
|
|
|
# Note that line_nr counts from _1_ although 0 is possible when |
176
|
|
|
# inserting lines before everything |
177
|
|
|
for line_nr in sorted(self._changes): |
178
|
|
|
result.extend(self._file[current_line:max(line_nr-1, 0)]) |
179
|
|
|
linediff = self._changes[line_nr] |
180
|
|
|
if not linediff.delete and not linediff.change and line_nr > 0: |
181
|
|
|
result.append(self._file[line_nr-1]) |
182
|
|
|
elif linediff.change: |
183
|
|
|
result.append(linediff.change[1]) |
184
|
|
|
|
185
|
|
|
if linediff.add_after: |
186
|
|
|
result.extend(linediff.add_after) |
187
|
|
|
|
188
|
|
|
current_line = line_nr |
189
|
|
|
|
190
|
|
|
result.extend(self._file[current_line:]) |
191
|
|
|
|
192
|
|
|
return result |
193
|
|
|
|
194
|
|
|
@property |
195
|
|
|
def unified_diff(self): |
196
|
|
|
""" |
197
|
|
|
Generates a unified diff corresponding to this patch. |
198
|
|
|
|
199
|
|
|
Note that the unified diff is not deterministic and thus not suitable |
200
|
|
|
for equality comparison. |
201
|
|
|
""" |
202
|
|
|
return ''.join(difflib.unified_diff( |
203
|
|
|
self.original, |
204
|
|
|
self.modified, |
205
|
|
|
tofile=self.rename if isinstance(self.rename, str) else '')) |
206
|
|
|
|
207
|
|
|
def __json__(self): |
208
|
|
|
""" |
209
|
|
|
Override JSON export, using the unified diff is the easiest thing for |
210
|
|
|
the users. |
211
|
|
|
""" |
212
|
|
|
return self.unified_diff |
213
|
|
|
|
214
|
|
|
def affected_code(self, filename=None): |
215
|
|
|
""" |
216
|
|
|
Creates a list of SourceRange objects which point to the related code. |
217
|
|
|
Changes on continuous lines will be put into one SourceRange. |
218
|
|
|
|
219
|
|
|
:param filename: The filename to associate the SourceRange's to. |
220
|
|
|
:raises AssertionError: Filename was not specified via parameter or |
221
|
|
|
property |
222
|
|
|
:return: A list of all related SourceRange objects. |
223
|
|
|
""" |
224
|
|
|
assert filename or self.filename, "Filename not provided" |
225
|
|
|
|
226
|
|
|
filename = filename or self.filename |
227
|
|
|
|
228
|
|
|
return list(diff.range(filename) |
229
|
|
|
for diff in self.split_diff(distance=0)) |
230
|
|
|
|
231
|
|
|
def split_diff(self, distance=1): |
232
|
|
|
""" |
233
|
|
|
Splits this diff into small pieces, such that several continuously |
234
|
|
|
altered lines are still together in one diff. All subdiffs will be |
235
|
|
|
yielded. |
236
|
|
|
|
237
|
|
|
A diff like this with changes being together closely won't be splitted: |
238
|
|
|
|
239
|
|
|
>>> diff = Diff.from_string_arrays([ 'b', 'c', 'e'], |
240
|
|
|
... ['a', 'b', 'd', 'f']) |
241
|
|
|
>>> len(list(diff.split_diff())) |
242
|
|
|
1 |
243
|
|
|
|
244
|
|
|
If we set the distance to 0, it will be splitted: |
245
|
|
|
|
246
|
|
|
>>> len(list(diff.split_diff(distance=0))) |
247
|
|
|
2 |
248
|
|
|
|
249
|
|
|
If a negative distance is given, every change will be yielded as an own |
250
|
|
|
diff, even if they are right beneath each other: |
251
|
|
|
|
252
|
|
|
>>> len(list(diff.split_diff(distance=-1))) |
253
|
|
|
3 |
254
|
|
|
|
255
|
|
|
If a file gets renamed or deleted only, it will be yielded as is: |
256
|
|
|
|
257
|
|
|
>>> len(list(Diff([], rename='test').split_diff())) |
258
|
|
|
1 |
259
|
|
|
|
260
|
|
|
An empty diff will not yield any diffs: |
261
|
|
|
|
262
|
|
|
>>> len(list(Diff([]).split_diff())) |
263
|
|
|
0 |
264
|
|
|
|
265
|
|
|
:param distance: Number of unchanged lines that are allowed in between |
266
|
|
|
two changed lines so they get yielded as one diff. |
267
|
|
|
""" |
268
|
|
|
if not self: |
269
|
|
|
return |
270
|
|
|
|
271
|
|
|
last_line = -1 |
272
|
|
|
this_diff = Diff(self._file, rename=self.rename, delete=self.delete) |
273
|
|
|
for line in sorted(self._changes.keys()): |
274
|
|
|
if line > last_line + distance + 1 and len(this_diff._changes) > 0: |
275
|
|
|
yield this_diff |
276
|
|
|
this_diff = Diff(self._file, rename=self.rename, |
277
|
|
|
delete=self.delete) |
278
|
|
|
|
279
|
|
|
last_line = line |
280
|
|
|
this_diff._changes[line] = self._changes[line] |
281
|
|
|
|
282
|
|
|
# If the diff contains no line changes, the loop above will not be run |
283
|
|
|
# else, this_diff will never be empty and thus this has to be yielded |
284
|
|
|
# always. |
285
|
|
|
yield this_diff |
286
|
|
|
|
287
|
|
|
def range(self, filename=None): |
288
|
|
|
""" |
289
|
|
|
Calculates a SourceRange spanning over the whole Diff. If something is |
290
|
|
|
added after the 0th line (i.e. before the first line) the first line |
291
|
|
|
will be included in the SourceRange. |
292
|
|
|
|
293
|
|
|
The range of an empty diff will only affect the filename: |
294
|
|
|
|
295
|
|
|
>>> range = Diff([], filename="file").range() |
296
|
|
|
>>> range.file is None |
297
|
|
|
False |
298
|
|
|
>>> print(range.start.line) |
299
|
|
|
None |
300
|
|
|
|
301
|
|
|
In the interests of compatibility, you can still call this with a |
302
|
|
|
manually supplied filename as a parameter. |
303
|
|
|
|
304
|
|
|
>>> range = Diff([]).range("file") |
305
|
|
|
>>> range.file is None |
306
|
|
|
False |
307
|
|
|
>>> print(range.start.line) |
308
|
|
|
None |
309
|
|
|
|
310
|
|
|
:param filename: None or the filename to associate the |
311
|
|
|
SourceRange with. |
312
|
|
|
:raises AssertionError: Filename was not specified via parameter or |
313
|
|
|
property |
314
|
|
|
:return: A SourceRange object. |
315
|
|
|
""" |
316
|
|
|
assert filename or self.filename, "Filename not provided" |
317
|
|
|
|
318
|
|
|
filename = filename or self.filename |
319
|
|
|
|
320
|
|
|
if len(self._changes) == 0: |
321
|
|
|
return SourceRange.from_values(filename) |
322
|
|
|
|
323
|
|
|
start = min(self._changes.keys()) |
324
|
|
|
end = max(self._changes.keys()) |
325
|
|
|
return SourceRange.from_values(filename, |
326
|
|
|
start_line=max(1, start), |
327
|
|
|
end_line=max(1, end)) |
328
|
|
|
|
329
|
|
|
def __add__(self, other): |
330
|
|
|
""" |
331
|
|
|
Adds another diff to this one. Will throw an exception if this is not |
332
|
|
|
possible. (This will *not* be done in place.) |
333
|
|
|
""" |
334
|
|
|
if not isinstance(other, Diff): |
335
|
|
|
raise TypeError("Only diffs can be added to a diff.") |
336
|
|
|
|
337
|
|
|
if self.rename != other.rename and False not in (self.rename, |
338
|
|
|
other.rename): |
339
|
|
|
raise ConflictError("Diffs contain conflicting renamings.") |
340
|
|
|
|
341
|
|
|
if self.filename and other.filename and self.filename != other.filename: |
342
|
|
|
raise ConflictError("Diffs are for different files") |
343
|
|
|
|
344
|
|
|
result = copy.deepcopy(self) |
345
|
|
|
result.rename = self.rename or other.rename |
346
|
|
|
result.delete = self.delete or other.delete |
347
|
|
|
|
348
|
|
|
for line_nr in other._changes: |
349
|
|
|
change = other._changes[line_nr] |
350
|
|
|
if change.delete is True: |
351
|
|
|
result.delete_line(line_nr) |
352
|
|
|
if change.add_after is not False: |
353
|
|
|
result.add_lines(line_nr, change.add_after) |
354
|
|
|
if change.change is not False: |
355
|
|
|
result.change_line(line_nr, change.change[0], change.change[1]) |
356
|
|
|
|
357
|
|
|
return result |
358
|
|
|
|
359
|
|
|
def __bool__(self): |
360
|
|
|
""" |
361
|
|
|
>>> bool(Diff([])) |
362
|
|
|
False |
363
|
|
|
>>> bool(Diff([], rename="some")) |
364
|
|
|
True |
365
|
|
|
>>> bool(Diff([], delete=True)) |
366
|
|
|
True |
367
|
|
|
>>> bool(Diff.from_string_arrays(['1'], [])) |
368
|
|
|
True |
369
|
|
|
|
370
|
|
|
:return: False if the patch has no effect at all when applied. |
371
|
|
|
""" |
372
|
|
|
return (self.rename is not False or |
373
|
|
|
self.delete is True or |
374
|
|
|
len(self._changes) > 0) |
375
|
|
|
|
376
|
|
|
def delete_line(self, line_nr): |
377
|
|
|
""" |
378
|
|
|
Mark the given line nr as deleted. The first line is line number 1. |
379
|
|
|
""" |
380
|
|
|
linediff = self._get_change(line_nr) |
381
|
|
|
linediff.delete = True |
382
|
|
|
self._changes[line_nr] = linediff |
383
|
|
|
|
384
|
|
|
def delete_lines(self, line_nr_start, line_nr_end): |
385
|
|
|
""" |
386
|
|
|
Delete lines in a specified range, inclusively. |
387
|
|
|
""" |
388
|
|
|
for line_nr in range(line_nr_start, line_nr_end + 1): |
389
|
|
|
self.delete_line(line_nr) |
390
|
|
|
|
391
|
|
|
def add_lines(self, line_nr_before, lines): |
392
|
|
|
""" |
393
|
|
|
Adds lines after the given line number. |
394
|
|
|
|
395
|
|
|
:param line_nr_before: Line number of the line before the additions. |
396
|
|
|
Use 0 for insert lines before everything. |
397
|
|
|
:param lines: A list of lines to add. |
398
|
|
|
""" |
399
|
|
|
if lines == []: |
400
|
|
|
return # No action |
401
|
|
|
|
402
|
|
|
linediff = self._get_change(line_nr_before, min_line=0) |
403
|
|
|
if linediff.add_after is not False: |
404
|
|
|
raise ConflictError("Cannot add lines after the given line since " |
405
|
|
|
"there are already lines.") |
406
|
|
|
|
407
|
|
|
linediff.add_after = lines |
408
|
|
|
self._changes[line_nr_before] = linediff |
409
|
|
|
|
410
|
|
|
def change_line(self, line_nr, original_line, replacement): |
411
|
|
|
""" |
412
|
|
|
Changes the given line with the given line number. The replacement will |
413
|
|
|
be there instead. |
414
|
|
|
""" |
415
|
|
|
linediff = self._get_change(line_nr) |
416
|
|
|
if linediff.change is not False and linediff.change[1] != replacement: |
417
|
|
|
raise ConflictError("An already changed line cannot be changed.") |
418
|
|
|
|
419
|
|
|
linediff.change = (original_line, replacement) |
420
|
|
|
self._changes[line_nr] = linediff |
421
|
|
|
|