Completed
Pull Request — master (#2196)
by Lasse
01:56
created

Diff.range()   B

Complexity

Conditions 2

Size

Total Lines 24

Duplication

Lines 0
Ratio 0 %

Importance

Changes 1
Bugs 0 Features 0
Metric Value
cc 2
c 1
b 0
f 0
dl 0
loc 24
rs 8.9713
1
import copy
2
import difflib
3
4
from coalib.results.LineDiff import LineDiff, ConflictError
5
from coalib.results.SourceRange import SourceRange
6
from coala_decorators.decorators import enforce_signature, generate_eq
7
8
9
@generate_eq("_file", "modified", "rename", "delete")
10
class Diff:
11
    """
12
    A Diff result represents a difference for one file.
13
    """
14
15
    def __init__(self, file_list, rename=False, delete=False):
16
        """
17
        Creates an empty diff for the given file.
18
19
        :param file_list: The original (unmodified) file as a list of its
20
                          lines.
21
        :param rename:    False or str containing new name of file.
22
        :param delete:    True if file is set to be deleted.
23
        """
24
        self._changes = {}
25
        self._file = file_list
26
        self.rename = rename
27
        self.delete = delete
28
29
    @classmethod
30
    def from_string_arrays(cls, file_array_1, file_array_2, rename=False):
31
        """
32
        Creates a Diff object from two arrays containing strings.
33
34
        If this Diff is applied to the original array, the second array will be
35
        created.
36
37
        :param file_array_1: Original array
38
        :param file_array_2: Array to compare
39
        :param rename:       False or str containing new name of file.
40
        """
41
        result = cls(file_array_1, rename=rename)
42
43
        matcher = difflib.SequenceMatcher(None, file_array_1, file_array_2)
44
        # We use this because its faster (generator) and doesnt yield as much
45
        # useless information as get_opcodes.
46
        for change_group in matcher.get_grouped_opcodes(1):
47
            for (tag,
48
                 a_index_1,
49
                 a_index_2,
50
                 b_index_1,
51
                 b_index_2) in change_group:
52
                if tag == "delete":
53
                    for index in range(a_index_1+1, a_index_2+1):
54
                        result.delete_line(index)
55
                elif tag == "insert":
56
                    # We add after line, they add before, so dont add 1 here
57
                    result.add_lines(a_index_1,
58
                                     file_array_2[b_index_1:b_index_2])
59
                elif tag == "replace":
60
                    result.change_line(a_index_1+1,
61
                                       file_array_1[a_index_1],
62
                                       file_array_2[b_index_1])
63
                    result.add_lines(a_index_1+1,
64
                                     file_array_2[b_index_1+1:b_index_2])
65
                    for index in range(a_index_1+2, a_index_2+1):
66
                        result.delete_line(index)
67
68
        return result
69
70
    @classmethod
71
    def from_clang_fixit(cls, fixit, file):
72
        """
73
        Creates a Diff object from a given clang fixit and the file contents.
74
75
        :param fixit: A cindex.Fixit object.
76
        :param file:  A list of lines in the file to apply the fixit to.
77
        :return:      The corresponding Diff object.
78
        """
79
        assert isinstance(file, (list, tuple))
80
81
        oldvalue = '\n'.join(file[fixit.range.start.line-1:
82
                                  fixit.range.end.line])
83
        endindex = fixit.range.end.column - len(file[fixit.range.end.line-1])-1
84
85
        newvalue = (oldvalue[:fixit.range.start.column-1] +
86
                    fixit.value +
87
                    oldvalue[endindex:])
88
        new_file = (file[:fixit.range.start.line-1] +
89
                    type(file)(newvalue.splitlines(True)) +
90
                    file[fixit.range.end.line:])
91
92
        return cls.from_string_arrays(file, new_file)
93
94
    def _get_change(self, line_nr, min_line=1):
95
        if not isinstance(line_nr, int):
96
            raise TypeError("line_nr needs to be an integer.")
97
        if line_nr < min_line:
98
            raise ValueError("The given line number is not allowed.")
99
100
        return self._changes.get(line_nr, LineDiff())
101
102
    def stats(self):
103
        """
104
        Returns tuple containing number of additions and deletions in the diff.
105
        """
106
        additions = 0
107
        deletions = 0
108
        for line_diff in self._changes.values():
109
            if line_diff.change:
110
                additions += 1
111
                deletions += 1
112
            elif line_diff.delete:
113
                deletions += 1
114
            if line_diff.add_after:
115
                additions += len(line_diff.add_after)
116
        return additions, deletions
117
118
    def __len__(self):
119
        """
120
        Returns total number of additions and deletions in diff.
121
        """
122
        return sum(self.stats())
123
124
    @property
125
    def rename(self):
126
        """
127
        :return: string containing new name of the file.
128
        """
129
        return self._rename
130
131
    @rename.setter
132
    @enforce_signature
133
    def rename(self, rename: (str, False)):
134
        """
135
        :param rename: False or string containing new name of file.
136
        """
137
        self._rename = rename
138
139
    @property
140
    def delete(self):
141
        """
142
        :return: True if file is set to be deleted.
143
        """
144
        return self._delete
145
146
    @delete.setter
147
    @enforce_signature
148
    def delete(self, delete: bool):
149
        """
150
        :param delete: True if file is set to be deleted, False otherwise.
151
        """
152
        self._delete = delete
153
154
    @property
155
    def original(self):
156
        """
157
        Retrieves the original file.
158
        """
159
        return self._file
160
161
    @property
162
    def modified(self):
163
        """
164
        Calculates the modified file, after applying the Diff to the original.
165
        """
166
        result = []
167
168
        if self.delete:
169
            return result
170
171
        current_line = 0
172
173
        # Note that line_nr counts from _1_ although 0 is possible when
174
        # inserting lines before everything
175
        for line_nr in sorted(self._changes):
176
            result.extend(self._file[current_line:max(line_nr-1, 0)])
177
            linediff = self._changes[line_nr]
178
            if not linediff.delete and not linediff.change and line_nr > 0:
179
                result.append(self._file[line_nr-1])
180
            elif linediff.change:
181
                result.append(linediff.change[1])
182
183
            if linediff.add_after:
184
                result.extend(linediff.add_after)
185
186
            current_line = line_nr
187
188
        result.extend(self._file[current_line:])
189
190
        return result
191
192
    @property
193
    def unified_diff(self):
194
        """
195
        Generates a unified diff corresponding to this patch.
196
197
        Note that the unified diff is not deterministic and thus not suitable
198
        for equality comparison.
199
        """
200
        return ''.join(difflib.unified_diff(
201
            self.original,
202
            self.modified,
203
            tofile=self.rename if isinstance(self.rename, str) else ''))
204
205
    def __json__(self):
206
        """
207
        Override JSON export, using the unified diff is the easiest thing for
208
        the users.
209
        """
210
        return self.unified_diff
211
212
    def affected_code(self, filename):
213
        """
214
        Creates a list of SourceRange objects which point to the related code.
215
        Changes on continuous lines will be put into one SourceRange.
216
217
        :param filename: The filename to associate the SourceRange's to.
218
        :return:         A list of all related SourceRange objects.
219
        """
220
        return list(diff.range(filename)
221
                    for diff in self.split_diff(distance=0))
222
223
    def split_diff(self, distance=1):
224
        """
225
        Splits this diff into small pieces, such that several continuously
226
        altered lines are still together in one diff. All subdiffs will be
227
        yielded.
228
229
        A diff like this with changes being together closely won't be splitted:
230
231
        >>> diff = Diff.from_string_arrays([     'b', 'c', 'e'],
232
        ...                                ['a', 'b', 'd', 'f'])
233
        >>> len(list(diff.split_diff()))
234
        1
235
236
        If we set the distance to 0, it will be splitted:
237
238
        >>> len(list(diff.split_diff(distance=0)))
239
        2
240
241
        If a negative distance is given, every change will be yielded as an own
242
        diff, even if they are right beneath each other:
243
244
        >>> len(list(diff.split_diff(distance=-1)))
245
        3
246
247
        If a file gets renamed or deleted only, it will be yielded as is:
248
249
        >>> len(list(Diff([], rename='test').split_diff()))
250
        1
251
252
        An empty diff will not yield any diffs:
253
254
        >>> len(list(Diff([]).split_diff()))
255
        0
256
257
        :param distance: Number of unchanged lines that are allowed in between
258
                         two changed lines so they get yielded as one diff.
259
        """
260
        if not bool(self):
261
            return ()
262
263
        last_line = -1
264
        this_diff = Diff(self._file, rename=self.rename, delete=self.delete)
265
        for line in sorted(self._changes.keys()):
266
            if line > last_line + distance + 1 and len(this_diff._changes) > 0:
267
                yield this_diff
268
                this_diff = Diff(self._file, rename=self.rename,
269
                                 delete=self.delete)
270
271
            last_line = line
272
            this_diff._changes[line] = self._changes[line]
273
274
        # If the diff contains no line changes, the loop above will not be run
275
        # else, this_diff will never be empty and thus this has to be yielded
276
        # always.
277
        yield this_diff
278
279
    def range(self, filename):
280
        """
281
        Calculates a SourceRange spanning over the whole Diff. If something is
282
        added after the 0th line (i.e. before the first line) the first line
283
        will be included in the SourceRange.
284
285
        The range of an empty diff will only affect the filename:
286
287
        >>> range = Diff([]).range("file")
288
        >>> range.file is None
289
        False
290
        >>> range.start.line  # None
291
292
        :param filename: The filename to associate the SourceRange with.
293
        :return:         A SourceRange object.
294
        """
295
        if len(self._changes) == 0:
296
            return SourceRange.from_values(filename)
297
298
        start = min(self._changes.keys())
299
        end = max(self._changes.keys())
300
        return SourceRange.from_values(filename,
301
                                       start_line=max(1, start),
302
                                       end_line=max(1, end))
303
304
    def __add__(self, other):
305
        """
306
        Adds another diff to this one. Will throw an exception if this is not
307
        possible. (This will *not* be done in place.)
308
        """
309
        if not isinstance(other, Diff):
310
            raise TypeError("Only diffs can be added to a diff.")
311
312
        if self.rename != other.rename and False not in (self.rename,
313
                                                         other.rename):
314
            raise ConflictError("Diffs contain conflicting renamings.")
315
316
        result = copy.deepcopy(self)
317
        result.rename = self.rename or other.rename
318
        result.delete = self.delete or other.delete
319
320
        for line_nr in other._changes:
321
            change = other._changes[line_nr]
322
            if change.delete is True:
323
                result.delete_line(line_nr)
324
            if change.add_after is not False:
325
                result.add_lines(line_nr, change.add_after)
326
            if change.change is not False:
327
                result.change_line(line_nr, change.change[0], change.change[1])
328
329
        return result
330
331
    def __bool__(self):
332
        """
333
        >>> bool(Diff([]))
334
        False
335
        >>> bool(Diff([], rename="some"))
336
        True
337
        >>> bool(Diff([], delete=True))
338
        True
339
340
        :return: False if the patch has no effect at all when applied.
341
        """
342
        return (self.rename is not False or
343
                self.delete is True or
344
                len(self._changes) > 0)
345
346
    def delete_line(self, line_nr):
347
        """
348
        Mark the given line nr as deleted. The first line is line number 1.
349
        """
350
        linediff = self._get_change(line_nr)
351
        linediff.delete = True
352
        self._changes[line_nr] = linediff
353
354
    def add_lines(self, line_nr_before, lines):
355
        """
356
        Adds lines after the given line number.
357
358
        :param line_nr_before: Line number of the line before the additions.
359
                               Use 0 for insert lines before everything.
360
        :param lines:          A list of lines to add.
361
        """
362
        if lines == []:
363
            return  # No action
364
365
        linediff = self._get_change(line_nr_before, min_line=0)
366
        if linediff.add_after is not False:
367
            raise ConflictError("Cannot add lines after the given line since "
368
                                "there are already lines.")
369
370
        linediff.add_after = lines
371
        self._changes[line_nr_before] = linediff
372
373
    def change_line(self, line_nr, original_line, replacement):
374
        """
375
        Changes the given line with the given line number. The replacement will
376
        be there instead.
377
        """
378
        linediff = self._get_change(line_nr)
379
        if linediff.change is not False:
380
            raise ConflictError("An already changed line cannot be changed.")
381
382
        linediff.change = (original_line, replacement)
383
        self._changes[line_nr] = linediff
384