Completed
Pull Request — master (#2655)
by
unknown
02:31
created

Diff.__add__()   F

Complexity

Conditions 11

Size

Total Lines 29

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
cc 11
dl 0
loc 29
rs 3.1764
c 0
b 0
f 0

How to fix   Complexity   

Complexity

Complex classes like Diff.__add__() often do a lot of different things. To break such a class down, we need to identify a cohesive component within that class. A common approach to find such a component is to look for fields/methods that share the same prefixes, or suffixes.

Once you have determined the fields that belong together, you can apply the Extract Class refactoring. If the component makes sense as a sub-class, Extract Subclass is also a candidate, and is often faster.

1
import copy
2
import difflib
3
4
from coalib.results.LineDiff import LineDiff, ConflictError
5
from coalib.results.SourceRange import SourceRange
6
from coala_utils.decorators import enforce_signature, generate_eq
7
8
9
@generate_eq("_file", "modified", "rename", "delete")
10
class Diff:
11
    """
12
    A Diff result represents a difference for one file.
13
    """
14
15
    def __init__(self, file_list, filename=None, rename=False, delete=False):
16
        """
17
        Creates an empty diff for the given file.
18
19
        :param file_list: The original (unmodified) file as a list of its
20
                          lines.
21
22
        :param filename:  None or a string containing the name of the file
23
        :param rename:    False or str containing new name of file.
24
        :param delete:    True if file is set to be deleted.
25
        """
26
        self._changes = {}
27
        self._file = file_list
28
        self.filename = filename
29
        self.rename = rename
30
        self.delete = delete
31
32
    @classmethod
33
    def from_string_arrays(cls, file_array_1, file_array_2, rename=False):
34
        """
35
        Creates a Diff object from two arrays containing strings.
36
37
        If this Diff is applied to the original array, the second array will be
38
        created.
39
40
        :param file_array_1: Original array
41
        :param file_array_2: Array to compare
42
        :param rename:       False or str containing new name of file.
43
        """
44
        result = cls(file_array_1, rename=rename)
45
46
        matcher = difflib.SequenceMatcher(None, file_array_1, file_array_2)
47
        # We use this because its faster (generator) and doesn't yield as much
48
        # useless information as get_opcodes.
49
        for change_group in matcher.get_grouped_opcodes(1):
50
            for (tag,
51
                 a_index_1,
52
                 a_index_2,
53
                 b_index_1,
54
                 b_index_2) in change_group:
55
                if tag == "delete":
56
                    for index in range(a_index_1+1, a_index_2+1):
57
                        result.delete_line(index)
58
                elif tag == "insert":
59
                    # We add after line, they add before, so dont add 1 here
60
                    result.add_lines(a_index_1,
61
                                     file_array_2[b_index_1:b_index_2])
62
                elif tag == "replace":
63
                    result.change_line(a_index_1+1,
64
                                       file_array_1[a_index_1],
65
                                       file_array_2[b_index_1])
66
                    result.add_lines(a_index_1+1,
67
                                     file_array_2[b_index_1+1:b_index_2])
68
                    for index in range(a_index_1+2, a_index_2+1):
69
                        result.delete_line(index)
70
71
        return result
72
73
    @classmethod
74
    def from_clang_fixit(cls, fixit, file):
75
        """
76
        Creates a Diff object from a given clang fixit and the file contents.
77
78
        :param fixit: A cindex.Fixit object.
79
        :param file:  A list of lines in the file to apply the fixit to.
80
        :return:      The corresponding Diff object.
81
        """
82
        assert isinstance(file, (list, tuple))
83
84
        oldvalue = '\n'.join(file[fixit.range.start.line-1:
85
                                  fixit.range.end.line])
86
        endindex = fixit.range.end.column - len(file[fixit.range.end.line-1])-1
87
88
        newvalue = (oldvalue[:fixit.range.start.column-1] +
89
                    fixit.value +
90
                    oldvalue[endindex:])
91
        new_file = (file[:fixit.range.start.line-1] +
92
                    type(file)(newvalue.splitlines(True)) +
93
                    file[fixit.range.end.line:])
94
95
        return cls.from_string_arrays(file, new_file)
96
97
    def _get_change(self, line_nr, min_line=1):
98
        if not isinstance(line_nr, int):
99
            raise TypeError("line_nr needs to be an integer.")
100
        if line_nr < min_line:
101
            raise ValueError("The given line number is not allowed.")
102
103
        return self._changes.get(line_nr, LineDiff())
104
105
    def stats(self):
106
        """
107
        Returns tuple containing number of additions and deletions in the diff.
108
        """
109
        additions = 0
110
        deletions = 0
111
        for line_diff in self._changes.values():
112
            if line_diff.change:
113
                additions += 1
114
                deletions += 1
115
            elif line_diff.delete:
116
                deletions += 1
117
            if line_diff.add_after:
118
                additions += len(line_diff.add_after)
119
        return additions, deletions
120
121
    def __len__(self):
122
        """
123
        Returns total number of additions and deletions in diff.
124
        """
125
        return sum(self.stats())
126
127
    @property
128
    def rename(self):
129
        """
130
        :return: string containing new name of the file.
131
        """
132
        return self._rename
133
134
    @rename.setter
135
    @enforce_signature
136
    def rename(self, rename: (str, False)):
137
        """
138
        :param rename: False or string containing new name of file.
139
        """
140
        self._rename = rename
141
142
    @property
143
    def delete(self):
144
        """
145
        :return: True if file is set to be deleted.
146
        """
147
        return self._delete
148
149
    @delete.setter
150
    @enforce_signature
151
    def delete(self, delete: bool):
152
        """
153
        :param delete: True if file is set to be deleted, False otherwise.
154
        """
155
        self._delete = delete
156
157
    @property
158
    def original(self):
159
        """
160
        Retrieves the original file.
161
        """
162
        return self._file
163
164
    @property
165
    def modified(self):
166
        """
167
        Calculates the modified file, after applying the Diff to the original.
168
        """
169
        result = []
170
171
        if self.delete:
172
            return result
173
174
        current_line = 0
175
176
        # Note that line_nr counts from _1_ although 0 is possible when
177
        # inserting lines before everything
178
        for line_nr in sorted(self._changes):
179
            result.extend(self._file[current_line:max(line_nr-1, 0)])
180
            linediff = self._changes[line_nr]
181
            if not linediff.delete and not linediff.change and line_nr > 0:
182
                result.append(self._file[line_nr-1])
183
            elif linediff.change:
184
                result.append(linediff.change[1])
185
186
            if linediff.add_after:
187
                result.extend(linediff.add_after)
188
189
            current_line = line_nr
190
191
        result.extend(self._file[current_line:])
192
193
        return result
194
195
    @property
196
    def unified_diff(self):
197
        """
198
        Generates a unified diff corresponding to this patch.
199
200
        Note that the unified diff is not deterministic and thus not suitable
201
        for equality comparison.
202
        """
203
        return ''.join(difflib.unified_diff(
204
            self.original,
205
            self.modified,
206
            tofile=self.rename if isinstance(self.rename, str) else ''))
207
208
    def __json__(self):
209
        """
210
        Override JSON export, using the unified diff is the easiest thing for
211
        the users.
212
        """
213
        return self.unified_diff
214
215
    def affected_code(self, filename=None):
216
        """
217
        Creates a list of SourceRange objects which point to the related code.
218
        Changes on continuous lines will be put into one SourceRange.
219
220
        :param filename: The filename to associate the SourceRange's to.
221
        :return:         A list of all related SourceRange objects.
222
        """
223
        assert filename or self.filename
224
225
        filename = filename or self.filename
226
227
        return list(diff.range(filename)
228
                    for diff in self.split_diff(distance=0))
229
230
    def split_diff(self, distance=1):
231
        """
232
        Splits this diff into small pieces, such that several continuously
233
        altered lines are still together in one diff. All subdiffs will be
234
        yielded.
235
236
        A diff like this with changes being together closely won't be splitted:
237
238
        >>> diff = Diff.from_string_arrays([     'b', 'c', 'e'],
239
        ...                                ['a', 'b', 'd', 'f'])
240
        >>> len(list(diff.split_diff()))
241
        1
242
243
        If we set the distance to 0, it will be splitted:
244
245
        >>> len(list(diff.split_diff(distance=0)))
246
        2
247
248
        If a negative distance is given, every change will be yielded as an own
249
        diff, even if they are right beneath each other:
250
251
        >>> len(list(diff.split_diff(distance=-1)))
252
        3
253
254
        If a file gets renamed or deleted only, it will be yielded as is:
255
256
        >>> len(list(Diff([], rename='test').split_diff()))
257
        1
258
259
        An empty diff will not yield any diffs:
260
261
        >>> len(list(Diff([]).split_diff()))
262
        0
263
264
        :param distance: Number of unchanged lines that are allowed in between
265
                         two changed lines so they get yielded as one diff.
266
        """
267
        if not self:
268
            return
269
270
        last_line = -1
271
        this_diff = Diff(self._file, rename=self.rename, delete=self.delete)
272
        for line in sorted(self._changes.keys()):
273
            if line > last_line + distance + 1 and len(this_diff._changes) > 0:
274
                yield this_diff
275
                this_diff = Diff(self._file, rename=self.rename,
276
                                 delete=self.delete)
277
278
            last_line = line
279
            this_diff._changes[line] = self._changes[line]
280
281
        # If the diff contains no line changes, the loop above will not be run
282
        # else, this_diff will never be empty and thus this has to be yielded
283
        # always.
284
        yield this_diff
285
286
    def range(self, filename=None):
287
        """
288
        Calculates a SourceRange spanning over the whole Diff. If something is
289
        added after the 0th line (i.e. before the first line) the first line
290
        will be included in the SourceRange.
291
292
        The range of an empty diff will only affect the filename:
293
294
        >>> range = Diff([]).range("file")
295
        >>> range.file is None
296
        False
297
        >>> print(range.start.line)
298
        None
299
300
        >>> range = Diff([], filename="file").range()
301
        >>> range.file is None
302
        False
303
        >>> print(range.start.line)
304
        None
305
306
        :param filename: None or the filename to associate the SourceRange with.
307
        :return:         A SourceRange object.
308
        """
309
        assert filename or self.filename
310
311
        filename = filename or self.filename
312
313
        if len(self._changes) == 0:
314
            return SourceRange.from_values(filename)
315
316
        start = min(self._changes.keys())
317
        end = max(self._changes.keys())
318
        return SourceRange.from_values(filename,
319
                                       start_line=max(1, start),
320
                                       end_line=max(1, end))
321
322
    def __add__(self, other):
323
        """
324
        Adds another diff to this one. Will throw an exception if this is not
325
        possible. (This will *not* be done in place.)
326
        """
327
        if not isinstance(other, Diff):
328
            raise TypeError("Only diffs can be added to a diff.")
329
330
        if self.rename != other.rename and False not in (self.rename,
331
                                                         other.rename):
332
            raise ConflictError("Diffs contain conflicting renamings.")
333
334
        if self.filename and other.filename and self.filename != other.filename:
335
            raise ConflictError("Diffs are for different files")
336
337
        result = copy.deepcopy(self)
338
        result.rename = self.rename or other.rename
339
        result.delete = self.delete or other.delete
340
341
        for line_nr in other._changes:
342
            change = other._changes[line_nr]
343
            if change.delete is True:
344
                result.delete_line(line_nr)
345
            if change.add_after is not False:
346
                result.add_lines(line_nr, change.add_after)
347
            if change.change is not False:
348
                result.change_line(line_nr, change.change[0], change.change[1])
349
350
        return result
351
352
    def __bool__(self):
353
        """
354
        >>> bool(Diff([]))
355
        False
356
        >>> bool(Diff([], rename="some"))
357
        True
358
        >>> bool(Diff([], delete=True))
359
        True
360
        >>> bool(Diff.from_string_arrays(['1'], []))
361
        True
362
363
        :return: False if the patch has no effect at all when applied.
364
        """
365
        return (self.rename is not False or
366
                self.delete is True or
367
                len(self._changes) > 0)
368
369
    def delete_line(self, line_nr):
370
        """
371
        Mark the given line nr as deleted. The first line is line number 1.
372
        """
373
        linediff = self._get_change(line_nr)
374
        linediff.delete = True
375
        self._changes[line_nr] = linediff
376
377
    def delete_lines(self, line_nr_start, line_nr_end):
378
        """
379
        Delete lines in a specified range, inclusively.
380
        """
381
        for line_nr in range(line_nr_start, line_nr_end + 1):
382
            self.delete_line(line_nr)
383
384
    def add_lines(self, line_nr_before, lines):
385
        """
386
        Adds lines after the given line number.
387
388
        :param line_nr_before: Line number of the line before the additions.
389
                               Use 0 for insert lines before everything.
390
        :param lines:          A list of lines to add.
391
        """
392
        if lines == []:
393
            return  # No action
394
395
        linediff = self._get_change(line_nr_before, min_line=0)
396
        if linediff.add_after is not False:
397
            raise ConflictError("Cannot add lines after the given line since "
398
                                "there are already lines.")
399
400
        linediff.add_after = lines
401
        self._changes[line_nr_before] = linediff
402
403
    def change_line(self, line_nr, original_line, replacement):
404
        """
405
        Changes the given line with the given line number. The replacement will
406
        be there instead.
407
        """
408
        linediff = self._get_change(line_nr)
409
        if linediff.change is not False and linediff.change[1] != replacement:
410
            raise ConflictError("An already changed line cannot be changed.")
411
412
        linediff.change = (original_line, replacement)
413
        self._changes[line_nr] = linediff
414