Completed
Pull Request — master (#2655)
by
unknown
01:49
created

Diff.__add__()   F

Complexity

Conditions 11

Size

Total Lines 29

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
cc 11
dl 0
loc 29
rs 3.1764
c 0
b 0
f 0

How to fix   Complexity   

Complexity

Complex classes like Diff.__add__() often do a lot of different things. To break such a class down, we need to identify a cohesive component within that class. A common approach to find such a component is to look for fields/methods that share the same prefixes, or suffixes.

Once you have determined the fields that belong together, you can apply the Extract Class refactoring. If the component makes sense as a sub-class, Extract Subclass is also a candidate, and is often faster.

1
import copy
2
import difflib
3
4
from coalib.results.LineDiff import LineDiff, ConflictError
5
from coalib.results.SourceRange import SourceRange
6
from coala_utils.decorators import enforce_signature, generate_eq
7
8
9
@generate_eq("_file", "modified", "rename", "delete")
10
class Diff:
11
    """
12
    A Diff result represents a difference for one file.
13
    """
14
15
    def __init__(self, file_list, filename=None, rename=False, delete=False):
16
        """
17
        Creates an empty diff for the given file.
18
19
        :param file_list: The original (unmodified) file as a list of its
20
                          lines.
21
        :param filename:  None or a string containing the name of the file
22
        :param rename:    False or str containing new name of file.
23
        :param delete:    True if file is set to be deleted.
24
        """
25
        self._changes = {}
26
        self._file = file_list
27
        self.filename = filename
28
        self.rename = rename
29
        self.delete = delete
30
31
    @classmethod
32
    def from_string_arrays(cls, file_array_1, file_array_2, rename=False):
33
        """
34
        Creates a Diff object from two arrays containing strings.
35
36
        If this Diff is applied to the original array, the second array will be
37
        created.
38
39
        :param file_array_1: Original array
40
        :param file_array_2: Array to compare
41
        :param rename:       False or str containing new name of file.
42
        """
43
        result = cls(file_array_1, rename=rename)
44
45
        matcher = difflib.SequenceMatcher(None, file_array_1, file_array_2)
46
        # We use this because its faster (generator) and doesn't yield as much
47
        # useless information as get_opcodes.
48
        for change_group in matcher.get_grouped_opcodes(1):
49
            for (tag,
50
                 a_index_1,
51
                 a_index_2,
52
                 b_index_1,
53
                 b_index_2) in change_group:
54
                if tag == "delete":
55
                    for index in range(a_index_1+1, a_index_2+1):
56
                        result.delete_line(index)
57
                elif tag == "insert":
58
                    # We add after line, they add before, so dont add 1 here
59
                    result.add_lines(a_index_1,
60
                                     file_array_2[b_index_1:b_index_2])
61
                elif tag == "replace":
62
                    result.change_line(a_index_1+1,
63
                                       file_array_1[a_index_1],
64
                                       file_array_2[b_index_1])
65
                    result.add_lines(a_index_1+1,
66
                                     file_array_2[b_index_1+1:b_index_2])
67
                    for index in range(a_index_1+2, a_index_2+1):
68
                        result.delete_line(index)
69
70
        return result
71
72
    @classmethod
73
    def from_clang_fixit(cls, fixit, file):
74
        """
75
        Creates a Diff object from a given clang fixit and the file contents.
76
77
        :param fixit: A cindex.Fixit object.
78
        :param file:  A list of lines in the file to apply the fixit to.
79
        :return:      The corresponding Diff object.
80
        """
81
        assert isinstance(file, (list, tuple))
82
83
        oldvalue = '\n'.join(file[fixit.range.start.line-1:
84
                                  fixit.range.end.line])
85
        endindex = fixit.range.end.column - len(file[fixit.range.end.line-1])-1
86
87
        newvalue = (oldvalue[:fixit.range.start.column-1] +
88
                    fixit.value +
89
                    oldvalue[endindex:])
90
        new_file = (file[:fixit.range.start.line-1] +
91
                    type(file)(newvalue.splitlines(True)) +
92
                    file[fixit.range.end.line:])
93
94
        return cls.from_string_arrays(file, new_file)
95
96
    def _get_change(self, line_nr, min_line=1):
97
        if not isinstance(line_nr, int):
98
            raise TypeError("line_nr needs to be an integer.")
99
        if line_nr < min_line:
100
            raise ValueError("The given line number is not allowed.")
101
102
        return self._changes.get(line_nr, LineDiff())
103
104
    def stats(self):
105
        """
106
        Returns tuple containing number of additions and deletions in the diff.
107
        """
108
        additions = 0
109
        deletions = 0
110
        for line_diff in self._changes.values():
111
            if line_diff.change:
112
                additions += 1
113
                deletions += 1
114
            elif line_diff.delete:
115
                deletions += 1
116
            if line_diff.add_after:
117
                additions += len(line_diff.add_after)
118
        return additions, deletions
119
120
    def __len__(self):
121
        """
122
        Returns total number of additions and deletions in diff.
123
        """
124
        return sum(self.stats())
125
126
    @property
127
    def rename(self):
128
        """
129
        :return: string containing new name of the file.
130
        """
131
        return self._rename
132
133
    @rename.setter
134
    @enforce_signature
135
    def rename(self, rename: (str, False)):
136
        """
137
        :param rename: False or string containing new name of file.
138
        """
139
        self._rename = rename
140
141
    @property
142
    def delete(self):
143
        """
144
        :return: True if file is set to be deleted.
145
        """
146
        return self._delete
147
148
    @delete.setter
149
    @enforce_signature
150
    def delete(self, delete: bool):
151
        """
152
        :param delete: True if file is set to be deleted, False otherwise.
153
        """
154
        self._delete = delete
155
156
    @property
157
    def original(self):
158
        """
159
        Retrieves the original file.
160
        """
161
        return self._file
162
163
    @property
164
    def modified(self):
165
        """
166
        Calculates the modified file, after applying the Diff to the original.
167
        """
168
        result = []
169
170
        if self.delete:
171
            return result
172
173
        current_line = 0
174
175
        # Note that line_nr counts from _1_ although 0 is possible when
176
        # inserting lines before everything
177
        for line_nr in sorted(self._changes):
178
            result.extend(self._file[current_line:max(line_nr-1, 0)])
179
            linediff = self._changes[line_nr]
180
            if not linediff.delete and not linediff.change and line_nr > 0:
181
                result.append(self._file[line_nr-1])
182
            elif linediff.change:
183
                result.append(linediff.change[1])
184
185
            if linediff.add_after:
186
                result.extend(linediff.add_after)
187
188
            current_line = line_nr
189
190
        result.extend(self._file[current_line:])
191
192
        return result
193
194
    @property
195
    def unified_diff(self):
196
        """
197
        Generates a unified diff corresponding to this patch.
198
199
        Note that the unified diff is not deterministic and thus not suitable
200
        for equality comparison.
201
        """
202
        return ''.join(difflib.unified_diff(
203
            self.original,
204
            self.modified,
205
            tofile=self.rename if isinstance(self.rename, str) else ''))
206
207
    def __json__(self):
208
        """
209
        Override JSON export, using the unified diff is the easiest thing for
210
        the users.
211
        """
212
        return self.unified_diff
213
214
    def affected_code(self, filename=None):
215
        """
216
        Creates a list of SourceRange objects which point to the related code.
217
        Changes on continuous lines will be put into one SourceRange.
218
219
        :param filename:        The filename to associate the SourceRange's to.
220
        :raises AssertionError: Filename was not specified via parameter or
221
                                property
222
        :return:                A list of all related SourceRange objects.
223
        """
224
        assert filename or self.filename, "Filename not provided"
225
226
        filename = filename or self.filename
227
228
        return list(diff.range(filename)
229
                    for diff in self.split_diff(distance=0))
230
231
    def split_diff(self, distance=1):
232
        """
233
        Splits this diff into small pieces, such that several continuously
234
        altered lines are still together in one diff. All subdiffs will be
235
        yielded.
236
237
        A diff like this with changes being together closely won't be splitted:
238
239
        >>> diff = Diff.from_string_arrays([     'b', 'c', 'e'],
240
        ...                                ['a', 'b', 'd', 'f'])
241
        >>> len(list(diff.split_diff()))
242
        1
243
244
        If we set the distance to 0, it will be splitted:
245
246
        >>> len(list(diff.split_diff(distance=0)))
247
        2
248
249
        If a negative distance is given, every change will be yielded as an own
250
        diff, even if they are right beneath each other:
251
252
        >>> len(list(diff.split_diff(distance=-1)))
253
        3
254
255
        If a file gets renamed or deleted only, it will be yielded as is:
256
257
        >>> len(list(Diff([], rename='test').split_diff()))
258
        1
259
260
        An empty diff will not yield any diffs:
261
262
        >>> len(list(Diff([]).split_diff()))
263
        0
264
265
        :param distance: Number of unchanged lines that are allowed in between
266
                         two changed lines so they get yielded as one diff.
267
        """
268
        if not self:
269
            return
270
271
        last_line = -1
272
        this_diff = Diff(self._file, rename=self.rename, delete=self.delete)
273
        for line in sorted(self._changes.keys()):
274
            if line > last_line + distance + 1 and len(this_diff._changes) > 0:
275
                yield this_diff
276
                this_diff = Diff(self._file, rename=self.rename,
277
                                 delete=self.delete)
278
279
            last_line = line
280
            this_diff._changes[line] = self._changes[line]
281
282
        # If the diff contains no line changes, the loop above will not be run
283
        # else, this_diff will never be empty and thus this has to be yielded
284
        # always.
285
        yield this_diff
286
287
    def range(self, filename=None):
288
        """
289
        Calculates a SourceRange spanning over the whole Diff. If something is
290
        added after the 0th line (i.e. before the first line) the first line
291
        will be included in the SourceRange.
292
293
        The range of an empty diff will only affect the filename:
294
295
        >>> range = Diff([], filename="file").range()
296
        >>> range.file is None
297
        False
298
        >>> print(range.start.line)
299
        None
300
301
        In the interests of compatibility, you can still call this with a
302
        manually supplied filename as a parameter.
303
304
        >>> range = Diff([]).range("file")
305
        >>> range.file is None
306
        False
307
        >>> print(range.start.line)
308
        None
309
310
        :param filename:        None or the filename to associate the
311
                                SourceRange with.
312
        :raises AssertionError: Filename was not specified via parameter or
313
                                property
314
        :return:                A SourceRange object.
315
        """
316
        assert filename or self.filename, "Filename not provided"
317
318
        filename = filename or self.filename
319
320
        if len(self._changes) == 0:
321
            return SourceRange.from_values(filename)
322
323
        start = min(self._changes.keys())
324
        end = max(self._changes.keys())
325
        return SourceRange.from_values(filename,
326
                                       start_line=max(1, start),
327
                                       end_line=max(1, end))
328
329
    def __add__(self, other):
330
        """
331
        Adds another diff to this one. Will throw an exception if this is not
332
        possible. (This will *not* be done in place.)
333
        """
334
        if not isinstance(other, Diff):
335
            raise TypeError("Only diffs can be added to a diff.")
336
337
        if self.rename != other.rename and False not in (self.rename,
338
                                                         other.rename):
339
            raise ConflictError("Diffs contain conflicting renamings.")
340
341
        if self.filename and other.filename and self.filename != other.filename:
342
            raise ConflictError("Diffs are for different files")
343
344
        result = copy.deepcopy(self)
345
        result.rename = self.rename or other.rename
346
        result.delete = self.delete or other.delete
347
348
        for line_nr in other._changes:
349
            change = other._changes[line_nr]
350
            if change.delete is True:
351
                result.delete_line(line_nr)
352
            if change.add_after is not False:
353
                result.add_lines(line_nr, change.add_after)
354
            if change.change is not False:
355
                result.change_line(line_nr, change.change[0], change.change[1])
356
357
        return result
358
359
    def __bool__(self):
360
        """
361
        >>> bool(Diff([]))
362
        False
363
        >>> bool(Diff([], rename="some"))
364
        True
365
        >>> bool(Diff([], delete=True))
366
        True
367
        >>> bool(Diff.from_string_arrays(['1'], []))
368
        True
369
370
        :return: False if the patch has no effect at all when applied.
371
        """
372
        return (self.rename is not False or
373
                self.delete is True or
374
                len(self._changes) > 0)
375
376
    def delete_line(self, line_nr):
377
        """
378
        Mark the given line nr as deleted. The first line is line number 1.
379
        """
380
        linediff = self._get_change(line_nr)
381
        linediff.delete = True
382
        self._changes[line_nr] = linediff
383
384
    def delete_lines(self, line_nr_start, line_nr_end):
385
        """
386
        Delete lines in a specified range, inclusively.
387
        """
388
        for line_nr in range(line_nr_start, line_nr_end + 1):
389
            self.delete_line(line_nr)
390
391
    def add_lines(self, line_nr_before, lines):
392
        """
393
        Adds lines after the given line number.
394
395
        :param line_nr_before: Line number of the line before the additions.
396
                               Use 0 for insert lines before everything.
397
        :param lines:          A list of lines to add.
398
        """
399
        if lines == []:
400
            return  # No action
401
402
        linediff = self._get_change(line_nr_before, min_line=0)
403
        if linediff.add_after is not False:
404
            raise ConflictError("Cannot add lines after the given line since "
405
                                "there are already lines.")
406
407
        linediff.add_after = lines
408
        self._changes[line_nr_before] = linediff
409
410
    def change_line(self, line_nr, original_line, replacement):
411
        """
412
        Changes the given line with the given line number. The replacement will
413
        be there instead.
414
        """
415
        linediff = self._get_change(line_nr)
416
        if linediff.change is not False and linediff.change[1] != replacement:
417
            raise ConflictError("An already changed line cannot be changed.")
418
419
        linediff.change = (original_line, replacement)
420
        self._changes[line_nr] = linediff
421