Completed
Pull Request — master (#2132)
by Udayan
01:51
created

Diff.__add__()   D

Complexity

Conditions 8

Size

Total Lines 26

Duplication

Lines 0
Ratio 0 %

Importance

Changes 2
Bugs 0 Features 0
Metric Value
cc 8
c 2
b 0
f 0
dl 0
loc 26
rs 4
1
import copy
2
import difflib
3
4
from coalib.results.LineDiff import LineDiff, ConflictError
5
from coalib.results.SourceRange import SourceRange
6
from coalib.misc.Decorators import enforce_signature, generate_eq
7
8
9
@generate_eq("_file", "modified", "rename", "delete")
10
class Diff:
11
    """
12
    A Diff result represents a difference for one file.
13
    """
14
15
    def __init__(self, file_list, rename=False, delete=False):
16
        """
17
        Creates an empty diff for the given file.
18
19
        :param file_list: The original (unmodified) file as a list of its
20
                          lines.
21
        :param rename:    False or str containing new name of file.
22
        :param delete:    True if file is set to be deleted.
23
        """
24
        self._changes = {}
25
        self._file = file_list
26
        self.rename = rename
27
        self.delete = delete
28
29
    @classmethod
30
    def from_string_arrays(cls, file_array_1, file_array_2, rename=False):
31
        """
32
        Creates a Diff object from two arrays containing strings.
33
34
        If this Diff is applied to the original array, the second array will be
35
        created.
36
37
        :param file_array_1: Original array
38
        :param file_array_2: Array to compare
39
        :param rename:       False or str containing new name of file.
40
        """
41
        result = cls(file_array_1, rename=rename)
42
43
        matcher = difflib.SequenceMatcher(None, file_array_1, file_array_2)
44
        # We use this because its faster (generator) and doesnt yield as much
45
        # useless information as get_opcodes.
46
        for change_group in matcher.get_grouped_opcodes(1):
47
            for (tag,
48
                 a_index_1,
49
                 a_index_2,
50
                 b_index_1,
51
                 b_index_2) in change_group:
52
                if tag == "delete":
53
                    for index in range(a_index_1+1, a_index_2+1):
54
                        result.delete_line(index)
55
                elif tag == "insert":
56
                    # We add after line, they add before, so dont add 1 here
57
                    result.add_lines(a_index_1,
58
                                     file_array_2[b_index_1:b_index_2])
59
                elif tag == "replace":
60
                    result.change_line(a_index_1+1,
61
                                       file_array_1[a_index_1],
62
                                       file_array_2[b_index_1])
63
                    result.add_lines(a_index_1+1,
64
                                     file_array_2[b_index_1+1:b_index_2])
65
                    for index in range(a_index_1+2, a_index_2+1):
66
                        result.delete_line(index)
67
68
        return result
69
70
    @classmethod
71
    def from_clang_fixit(cls, fixit, file):
72
        """
73
        Creates a Diff object from a given clang fixit and the file contents.
74
75
        :param fixit: A cindex.Fixit object.
76
        :param file:  A list of lines in the file to apply the fixit to.
77
        :return:      The corresponding Diff object.
78
        """
79
        assert isinstance(file, (list, tuple))
80
81
        oldvalue = '\n'.join(file[fixit.range.start.line-1:
82
                                  fixit.range.end.line])
83
        endindex = fixit.range.end.column - len(file[fixit.range.end.line-1])-1
84
85
        newvalue = (oldvalue[:fixit.range.start.column-1] +
86
                    fixit.value +
87
                    oldvalue[endindex:])
88
        new_file = (file[:fixit.range.start.line-1] +
89
                    type(file)(newvalue.splitlines(True)) +
90
                    file[fixit.range.end.line:])
91
92
        return cls.from_string_arrays(file, new_file)
93
94
    def _get_change(self, line_nr, min_line=1):
95
        if not isinstance(line_nr, int):
96
            raise TypeError("line_nr needs to be an integer.")
97
        if line_nr < min_line:
98
            raise ValueError("The given line number is not allowed.")
99
100
        return self._changes.get(line_nr, LineDiff())
101
102
    def stats(self):
103
        """
104
        Returns tuple containing number of additions and deletions in the diff.
105
        """
106
        additions = 0
107
        deletions = 0
108
        for line_diff in self._changes.values():
109
            if line_diff.change:
110
                additions += 1
111
                deletions += 1
112
            elif line_diff.delete:
113
                deletions += 1
114
            if line_diff.add_after:
115
                additions += len(line_diff.add_after)
116
        return additions, deletions
117
118
    def __len__(self):
119
        """
120
        Returns total number of additions and deletions in diff.
121
        """
122
        return sum(self.stats())
123
124
    @property
125
    def rename(self):
126
        """
127
        :return: string containing new name of the file.
128
        """
129
        return self._rename
130
131
    @rename.setter
132
    @enforce_signature
133
    def rename(self, rename: (str, False)):
134
        """
135
        :param rename: False or string containing new name of file.
136
        """
137
        self._rename = rename
138
139
    @property
140
    def delete(self):
141
        """
142
        :return: True if file is set to be deleted.
143
        """
144
        return self._delete
145
146
    @delete.setter
147
    @enforce_signature
148
    def delete(self, delete: bool):
149
        """
150
        :param delete: True if file is set to be deleted, False otherwise.
151
        """
152
        self._delete = delete
153
154
    @property
155
    def original(self):
156
        """
157
        Retrieves the original file.
158
        """
159
        return self._file
160
161
    @property
162
    def modified(self):
163
        """
164
        Calculates the modified file, after applying the Diff to the original.
165
        """
166
        result = []
167
168
        if self.delete:
169
            return result
170
171
        current_line = 0
172
173
        # Note that line_nr counts from _1_ although 0 is possible when
174
        # inserting lines before everything
175
        for line_nr in sorted(self._changes):
176
            result.extend(self._file[current_line:max(line_nr-1, 0)])
177
            linediff = self._changes[line_nr]
178
            if not linediff.delete and not linediff.change and line_nr > 0:
179
                result.append(self._file[line_nr-1])
180
            elif linediff.change:
181
                result.append(linediff.change[1])
182
183
            if linediff.add_after:
184
                result.extend(linediff.add_after)
185
186
            current_line = line_nr
187
188
        result.extend(self._file[current_line:])
189
190
        return result
191
192
    @property
193
    def unified_diff(self):
194
        """
195
        Generates a unified diff corresponding to this patch.
196
197
        Note that the unified diff is not deterministic and thus not suitable
198
        for equality comparison.
199
        """
200
        return ''.join(difflib.unified_diff(
201
            self.original,
202
            self.modified,
203
            tofile=self.rename if isinstance(self.rename, str) else ''))
204
205
    def __json__(self):
206
        """
207
        Override JSON export, using the unified diff is the easiest thing for
208
        the users.
209
        """
210
        return self.unified_diff
211
212
    def affected_code(self, filename):
213
        """
214
        Creates a list of SourceRange objects which point to the related code.
215
        Changes on continuous lines will be put into one SourceRange.
216
217
        :param filename: The filename to associate the SourceRange's to.
218
        :return:         A list of all related SourceRange objects.
219
        """
220
        return list(diff.range(filename)
221
                    for diff in self.split_diff(distance=0))
222
223
    def split_diff(self, distance=1):
224
        """
225
        Splits this diff into small pieces, such that several continuously
226
        altered lines are still together in one diff. All subdiffs will be
227
        yielded.
228
229
        A diff like this with changes being together closely won't be splitted:
230
231
        >>> diff = Diff.from_string_arrays([     'b', 'c', 'e'],
232
        ...                                ['a', 'b', 'd', 'f'])
233
        >>> len(list(diff.split_diff()))
234
        1
235
236
        If we set the distance to 0, it will be splitted:
237
238
        >>> len(list(diff.split_diff(distance=0)))
239
        2
240
241
        If a negative distance is given, every change will be yielded as an own
242
        diff, even if they are right beneath each other:
243
244
        >>> len(list(diff.split_diff(distance=-1)))
245
        3
246
247
        :param distance: Number of unchanged lines that are allowed in between
248
                         two changed lines so they get yielded as one diff.
249
        """
250
        last_line = -1
251
        this_diff = Diff(self._file, rename=self.rename, delete=self.delete)
252
        for line in sorted(self._changes.keys()):
253
            if line > last_line + distance + 1 and len(this_diff._changes) > 0:
254
                yield this_diff
255
                this_diff = Diff(self._file, rename=self.rename,
256
                                 delete=self.delete)
257
258
            last_line = line
259
            this_diff._changes[line] = self._changes[line]
260
261
        if len(this_diff._changes) > 0:
262
            yield this_diff
263
264
    def range(self, filename):
265
        """
266
        Calculates a SourceRange spanning over the whole Diff. If something is
267
        added after the 0th line (i.e. before the first line) the first line
268
        will be included in the SourceRange.
269
270
        :param filename: The filename to associate the SourceRange with.
271
        :return:         A SourceRange object.
272
        """
273
        start = min(self._changes.keys())
274
        end = max(self._changes.keys())
275
        return SourceRange.from_values(filename,
276
                                       start_line=max(1, start),
277
                                       end_line=max(1, end))
278
279
    def __add__(self, other):
280
        """
281
        Adds another diff to this one. Will throw an exception if this is not
282
        possible. (This will *not* be done in place.)
283
        """
284
        if not isinstance(other, Diff):
285
            raise TypeError("Only diffs can be added to a diff.")
286
287
        if self.rename != other.rename and False not in (self.rename,
288
                                                         other.rename):
289
            raise ConflictError("Diffs contain conflicting renamings.")
290
291
        result = copy.deepcopy(self)
292
        result.rename = self.rename or other.rename
293
        result.delete = self.delete or other.delete
294
295
        for line_nr in other._changes:
296
            change = other._changes[line_nr]
297
            if change.delete is True:
298
                result.delete_line(line_nr)
299
            if change.add_after is not False:
300
                result.add_lines(line_nr, change.add_after)
301
            if change.change is not False:
302
                result.change_line(line_nr, change.change[0], change.change[1])
303
304
        return result
305
306
    def delete_line(self, line_nr):
307
        """
308
        Mark the given line nr as deleted. The first line is line number 1.
309
        """
310
        linediff = self._get_change(line_nr)
311
        linediff.delete = True
312
        self._changes[line_nr] = linediff
313
314
    def add_lines(self, line_nr_before, lines):
315
        """
316
        Adds lines after the given line number.
317
318
        :param line_nr_before: Line number of the line before the additions.
319
                               Use 0 for insert lines before everything.
320
        :param lines:          A list of lines to add.
321
        """
322
        if lines == []:
323
            return  # No action
324
325
        linediff = self._get_change(line_nr_before, min_line=0)
326
        if linediff.add_after is not False:
327
            raise ConflictError("Cannot add lines after the given line since "
328
                                "there are already lines.")
329
330
        linediff.add_after = lines
331
        self._changes[line_nr_before] = linediff
332
333
    def change_line(self, line_nr, original_line, replacement):
334
        """
335
        Changes the given line with the given line number. The replacement will
336
        be there instead.
337
        """
338
        linediff = self._get_change(line_nr)
339
        if linediff.change is not False:
340
            raise ConflictError("An already changed line cannot be changed.")
341
342
        linediff.change = (original_line, replacement)
343
        self._changes[line_nr] = linediff
344