Completed
Pull Request — master (#2655)
by
unknown
01:46
created

Diff.__add__()   F

Complexity

Conditions 11

Size

Total Lines 29

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
cc 11
dl 0
loc 29
rs 3.1764
c 0
b 0
f 0

How to fix   Complexity   

Complexity

Complex classes like Diff.__add__() often do a lot of different things. To break such a class down, we need to identify a cohesive component within that class. A common approach to find such a component is to look for fields/methods that share the same prefixes, or suffixes.

Once you have determined the fields that belong together, you can apply the Extract Class refactoring. If the component makes sense as a sub-class, Extract Subclass is also a candidate, and is often faster.

1
import copy
2
import difflib
3
4
from coalib.results.LineDiff import LineDiff, ConflictError
5
from coalib.results.SourceRange import SourceRange
6
from coala_utils.decorators import enforce_signature, generate_eq
7
from coalib.output.printers.LOG_LEVEL import LOG_LEVEL
0 ignored issues
show
Unused Code introduced by
Unused LOG_LEVEL imported from coalib.output.printers.LOG_LEVEL
Loading history...
8
from coalib.output.printers.LogPrinter import LogPrinter
0 ignored issues
show
Unused Code introduced by
Unused LogPrinter imported from coalib.output.printers.LogPrinter
Loading history...
9
from pyprint.ConsolePrinter import ConsolePrinter
0 ignored issues
show
Unused Code introduced by
Unused ConsolePrinter imported from pyprint.ConsolePrinter
Loading history...
10
11
12
@generate_eq("_file", "modified", "rename", "delete")
13
class Diff:
14
    """
15
    A Diff result represents a difference for one file.
16
    """
17
18
    def __init__(self, file_list, filename=None, rename=False, delete=False):
19
        """
20
        Creates an empty diff for the given file.
21
22
        :param file_list: The original (unmodified) file as a list of its
23
                          lines.
24
        :param filename:  None or a string containing the name of the file
25
        :param rename:    False or str containing new name of file.
26
        :param delete:    True if file is set to be deleted.
27
        """
28
        self._changes = {}
29
        self._file = file_list
30
        self.filename = filename
31
32
        self.rename = rename
33
        self.delete = delete
34
35
    @classmethod
36
    def from_string_arrays(cls, file_array_1, file_array_2, rename=False):
37
        """
38
        Creates a Diff object from two arrays containing strings.
39
40
        If this Diff is applied to the original array, the second array will be
41
        created.
42
43
        :param file_array_1: Original array
44
        :param file_array_2: Array to compare
45
        :param rename:       False or str containing new name of file.
46
        """
47
        result = cls(file_array_1, rename=rename)
48
49
        matcher = difflib.SequenceMatcher(None, file_array_1, file_array_2)
50
        # We use this because its faster (generator) and doesn't yield as much
51
        # useless information as get_opcodes.
52
        for change_group in matcher.get_grouped_opcodes(1):
53
            for (tag,
54
                 a_index_1,
55
                 a_index_2,
56
                 b_index_1,
57
                 b_index_2) in change_group:
58
                if tag == "delete":
59
                    for index in range(a_index_1+1, a_index_2+1):
60
                        result.delete_line(index)
61
                elif tag == "insert":
62
                    # We add after line, they add before, so dont add 1 here
63
                    result.add_lines(a_index_1,
64
                                     file_array_2[b_index_1:b_index_2])
65
                elif tag == "replace":
66
                    result.change_line(a_index_1+1,
67
                                       file_array_1[a_index_1],
68
                                       file_array_2[b_index_1])
69
                    result.add_lines(a_index_1+1,
70
                                     file_array_2[b_index_1+1:b_index_2])
71
                    for index in range(a_index_1+2, a_index_2+1):
72
                        result.delete_line(index)
73
74
        return result
75
76
    @classmethod
77
    def from_clang_fixit(cls, fixit, file):
78
        """
79
        Creates a Diff object from a given clang fixit and the file contents.
80
81
        :param fixit: A cindex.Fixit object.
82
        :param file:  A list of lines in the file to apply the fixit to.
83
        :return:      The corresponding Diff object.
84
        """
85
        assert isinstance(file, (list, tuple))
86
87
        oldvalue = '\n'.join(file[fixit.range.start.line-1:
88
                                  fixit.range.end.line])
89
        endindex = fixit.range.end.column - len(file[fixit.range.end.line-1])-1
90
91
        newvalue = (oldvalue[:fixit.range.start.column-1] +
92
                    fixit.value +
93
                    oldvalue[endindex:])
94
        new_file = (file[:fixit.range.start.line-1] +
95
                    type(file)(newvalue.splitlines(True)) +
96
                    file[fixit.range.end.line:])
97
98
        return cls.from_string_arrays(file, new_file)
99
100
    def _get_change(self, line_nr, min_line=1):
101
        if not isinstance(line_nr, int):
102
            raise TypeError("line_nr needs to be an integer.")
103
        if line_nr < min_line:
104
            raise ValueError("The given line number is not allowed.")
105
106
        return self._changes.get(line_nr, LineDiff())
107
108
    def stats(self):
109
        """
110
        Returns tuple containing number of additions and deletions in the diff.
111
        """
112
        additions = 0
113
        deletions = 0
114
        for line_diff in self._changes.values():
115
            if line_diff.change:
116
                additions += 1
117
                deletions += 1
118
            elif line_diff.delete:
119
                deletions += 1
120
            if line_diff.add_after:
121
                additions += len(line_diff.add_after)
122
        return additions, deletions
123
124
    def __len__(self):
125
        """
126
        Returns total number of additions and deletions in diff.
127
        """
128
        return sum(self.stats())
129
130
    @property
131
    def rename(self):
132
        """
133
        :return: string containing new name of the file.
134
        """
135
        return self._rename
136
137
    @rename.setter
138
    @enforce_signature
139
    def rename(self, rename: (str, False)):
140
        """
141
        :param rename: False or string containing new name of file.
142
        """
143
        self._rename = rename
144
145
    @property
146
    def delete(self):
147
        """
148
        :return: True if file is set to be deleted.
149
        """
150
        return self._delete
151
152
    @delete.setter
153
    @enforce_signature
154
    def delete(self, delete: bool):
155
        """
156
        :param delete: True if file is set to be deleted, False otherwise.
157
        """
158
        self._delete = delete
159
160
    @property
161
    def original(self):
162
        """
163
        Retrieves the original file.
164
        """
165
        return self._file
166
167
    @property
168
    def modified(self):
169
        """
170
        Calculates the modified file, after applying the Diff to the original.
171
        """
172
        result = []
173
174
        if self.delete:
175
            return result
176
177
        current_line = 0
178
179
        # Note that line_nr counts from _1_ although 0 is possible when
180
        # inserting lines before everything
181
        for line_nr in sorted(self._changes):
182
            result.extend(self._file[current_line:max(line_nr-1, 0)])
183
            linediff = self._changes[line_nr]
184
            if not linediff.delete and not linediff.change and line_nr > 0:
185
                result.append(self._file[line_nr-1])
186
            elif linediff.change:
187
                result.append(linediff.change[1])
188
189
            if linediff.add_after:
190
                result.extend(linediff.add_after)
191
192
            current_line = line_nr
193
194
        result.extend(self._file[current_line:])
195
196
        return result
197
198
    @property
199
    def unified_diff(self):
200
        """
201
        Generates a unified diff corresponding to this patch.
202
203
        Note that the unified diff is not deterministic and thus not suitable
204
        for equality comparison.
205
        """
206
        return ''.join(difflib.unified_diff(
207
            self.original,
208
            self.modified,
209
            tofile=self.rename if isinstance(self.rename, str) else ''))
210
211
    def __json__(self):
212
        """
213
        Override JSON export, using the unified diff is the easiest thing for
214
        the users.
215
        """
216
        return self.unified_diff
217
218
    def affected_code(self, filename=None):
219
        """
220
        Creates a list of SourceRange objects which point to the related code.
221
        Changes on continuous lines will be put into one SourceRange.
222
223
        :param filename:        The filename to associate the SourceRange's to.
224
        :raises AssertionError: Filename was not specified via parameter or
225
                                property
226
        :return:                A list of all related SourceRange objects.
227
        """
228
        assert filename or self.filename, "Filename not provided"
229
230
        filename = filename or self.filename
231
232
        return list(diff.range(filename)
233
                    for diff in self.split_diff(distance=0))
234
235
    def split_diff(self, distance=1):
236
        """
237
        Splits this diff into small pieces, such that several continuously
238
        altered lines are still together in one diff. All subdiffs will be
239
        yielded.
240
241
        A diff like this with changes being together closely won't be splitted:
242
243
        >>> diff = Diff.from_string_arrays([     'b', 'c', 'e'],
244
        ...                                ['a', 'b', 'd', 'f'])
245
        >>> len(list(diff.split_diff()))
246
        1
247
248
        If we set the distance to 0, it will be splitted:
249
250
        >>> len(list(diff.split_diff(distance=0)))
251
        2
252
253
        If a negative distance is given, every change will be yielded as an own
254
        diff, even if they are right beneath each other:
255
256
        >>> len(list(diff.split_diff(distance=-1)))
257
        3
258
259
        If a file gets renamed or deleted only, it will be yielded as is:
260
261
        >>> len(list(Diff([], rename='test').split_diff()))
262
        1
263
264
        An empty diff will not yield any diffs:
265
266
        >>> len(list(Diff([]).split_diff()))
267
        0
268
269
        :param distance: Number of unchanged lines that are allowed in between
270
                         two changed lines so they get yielded as one diff.
271
        """
272
        if not self:
273
            return
274
275
        last_line = -1
276
        this_diff = Diff(self._file, rename=self.rename, delete=self.delete)
277
        for line in sorted(self._changes.keys()):
278
            if line > last_line + distance + 1 and len(this_diff._changes) > 0:
279
                yield this_diff
280
                this_diff = Diff(self._file, rename=self.rename,
281
                                 delete=self.delete)
282
283
            last_line = line
284
            this_diff._changes[line] = self._changes[line]
285
286
        # If the diff contains no line changes, the loop above will not be run
287
        # else, this_diff will never be empty and thus this has to be yielded
288
        # always.
289
        yield this_diff
290
291
    def range(self, filename=None):
292
        """
293
        Calculates a SourceRange spanning over the whole Diff. If something is
294
        added after the 0th line (i.e. before the first line) the first line
295
        will be included in the SourceRange.
296
297
        The range of an empty diff will only affect the filename:
298
299
        >>> range = Diff([], filename="file").range()
300
        >>> range.file is None
301
        False
302
        >>> print(range.start.line)
303
        None
304
305
        In the interests of compatibility, you can still call this with a
306
        manually supplied filename as a parameter.
307
308
        >>> range = Diff([]).range("file")
309
        >>> range.file is None
310
        False
311
        >>> print(range.start.line)
312
        None
313
314
        :param filename:        None or the filename to associate the
315
                                SourceRange with.
316
        :raises AssertionError: Filename was not specified via parameter or
317
                                property
318
        :return:                A SourceRange object.
319
        """
320
        assert filename or self.filename, "Filename not provided"
321
322
        filename = filename or self.filename
323
324
        if len(self._changes) == 0:
325
            return SourceRange.from_values(filename)
326
327
        start = min(self._changes.keys())
328
        end = max(self._changes.keys())
329
        return SourceRange.from_values(filename,
330
                                       start_line=max(1, start),
331
                                       end_line=max(1, end))
332
333
    def __add__(self, other):
334
        """
335
        Adds another diff to this one. Will throw an exception if this is not
336
        possible. (This will *not* be done in place.)
337
        """
338
        if not isinstance(other, Diff):
339
            raise TypeError("Only diffs can be added to a diff.")
340
341
        if self.rename != other.rename and False not in (self.rename,
342
                                                         other.rename):
343
            raise ConflictError("Diffs contain conflicting renamings.")
344
345
        if self.filename and other.filename and self.filename != other.filename:
346
            raise ConflictError("Diffs are for different files")
347
348
        result = copy.deepcopy(self)
349
        result.rename = self.rename or other.rename
350
        result.delete = self.delete or other.delete
351
352
        for line_nr in other._changes:
353
            change = other._changes[line_nr]
354
            if change.delete is True:
355
                result.delete_line(line_nr)
356
            if change.add_after is not False:
357
                result.add_lines(line_nr, change.add_after)
358
            if change.change is not False:
359
                result.change_line(line_nr, change.change[0], change.change[1])
360
361
        return result
362
363
    def __bool__(self):
364
        """
365
        >>> bool(Diff([]))
366
        False
367
        >>> bool(Diff([], rename="some"))
368
        True
369
        >>> bool(Diff([], delete=True))
370
        True
371
        >>> bool(Diff.from_string_arrays(['1'], []))
372
        True
373
374
        :return: False if the patch has no effect at all when applied.
375
        """
376
        return (self.rename is not False or
377
                self.delete is True or
378
                len(self._changes) > 0)
379
380
    def delete_line(self, line_nr):
381
        """
382
        Mark the given line nr as deleted. The first line is line number 1.
383
        """
384
        linediff = self._get_change(line_nr)
385
        linediff.delete = True
386
        self._changes[line_nr] = linediff
387
388
    def delete_lines(self, line_nr_start, line_nr_end):
389
        """
390
        Delete lines in a specified range, inclusively.
391
        """
392
        for line_nr in range(line_nr_start, line_nr_end + 1):
393
            self.delete_line(line_nr)
394
395
    def add_lines(self, line_nr_before, lines):
396
        """
397
        Adds lines after the given line number.
398
399
        :param line_nr_before: Line number of the line before the additions.
400
                               Use 0 for insert lines before everything.
401
        :param lines:          A list of lines to add.
402
        """
403
        if lines == []:
404
            return  # No action
405
406
        linediff = self._get_change(line_nr_before, min_line=0)
407
        if linediff.add_after is not False:
408
            raise ConflictError("Cannot add lines after the given line since "
409
                                "there are already lines.")
410
411
        linediff.add_after = lines
412
        self._changes[line_nr_before] = linediff
413
414
    def change_line(self, line_nr, original_line, replacement):
415
        """
416
        Changes the given line with the given line number. The replacement will
417
        be there instead.
418
        """
419
        linediff = self._get_change(line_nr)
420
        if linediff.change is not False and linediff.change[1] != replacement:
421
            raise ConflictError("An already changed line cannot be changed.")
422
423
        linediff.change = (original_line, replacement)
424
        self._changes[line_nr] = linediff
425