Completed
Pull Request — master (#2100)
by Udayan
01:53
created

ensure_files_present()   D

Complexity

Conditions 8

Size

Total Lines 30

Duplication

Lines 0
Ratio 0 %

Importance

Changes 2
Bugs 0 Features 0
Metric Value
cc 8
dl 0
loc 30
rs 4
c 2
b 0
f 0
1
import copy
2
from difflib import SequenceMatcher
3
4
from coalib.results.Diff import ConflictError, Diff
5
from coalib.results.SourceRange import SourceRange
6
7
8
def filter_results(original_file_dict,
9
                   modified_file_dict,
10
                   original_results,
11
                   modified_results):
12
    """
13
    Filters results for such ones that are unique across file changes
14
15
    :param original_file_dict: Dict of lists of file contents before  changes
16
    :param modified_file_dict: Dict of lists of file contents after changes
17
    :param original_results:   List of results of the old files
18
    :param modified_results:   List of results of the new files
19
    :return:                   List of results from new files that are unique
20
                               from all those that existed in the old changes
21
    """
22
23
    renamed_files = ensure_files_present(original_file_dict,
24
                                         modified_file_dict)
25
    # diffs_dict[file] is a diff between the original and modified file
26
    diffs_dict = {}
27
    for file in original_file_dict:
28
        if file in renamed_files:
29
            diffs_dict[file] = Diff.from_string_arrays(
30
                original_file_dict[file],
31
                modified_file_dict[renamed_files[file]])
32
        else:
33
            diffs_dict[file] = Diff.from_string_arrays(
34
                original_file_dict[file],
35
                modified_file_dict[file])
36
37
    orig_result_diff_dict_dict = remove_result_ranges_diffs(original_results,
38
                                                            original_file_dict)
39
40
    mod_result_diff_dict_dict = remove_result_ranges_diffs(modified_results,
41
                                                           modified_file_dict)
42
43
    unique_results = []
44
45
    for m_r in reversed(modified_results):
46
        unique = True
47
48
        for o_r in original_results:
49
50
            if basics_match(o_r, m_r, renamed_files):
51
                if source_ranges_match(original_file_dict,
52
                                       diffs_dict,
53
                                       orig_result_diff_dict_dict[o_r],
54
                                       mod_result_diff_dict_dict[m_r],
55
                                       renamed_files):
56
57
                    # at least one original result matches completely
58
                    unique = False
59
                    break
60
        if unique:
61
            unique_results.append(m_r)
62
63
    return unique_results
64
65
66
def basics_match(original_result,
67
                 modified_result,
68
                 renamed_files):
69
    """
70
    Checks whether the following properties of two results match:
71
    * origin
72
    * message
73
    * severity
74
    * debug_msg
75
76
    :param original_result: A result of the old files
77
    :param modified_result: A result of the new files
78
    :param renamed_files:   A dictionary containing file renamings across runs
79
    :return:                Boolean value whether or not the properties match
80
    """
81
82
    flag = all(getattr(original_result, member) ==
83
               getattr(modified_result, member)
84
               for member in ['message', 'severity', 'debug_msg'])
85
86
    if getattr(original_result, 'origin') in renamed_files:
87
        name_flag = (renamed_files[getattr(original_result, 'origin')] ==
88
                     getattr(modified_result, 'origin'))
89
    else:
90
        name_flag = (getattr(original_result, 'origin') ==
91
                     getattr(modified_result, 'origin'))
92
93
    return flag and name_flag
94
95
96
def source_ranges_match(original_file_dict,
97
                        diff_dict,
98
                        original_result_diff_dict,
99
                        modified_result_diff_dict,
100
                        renamed_files):
101
    """
102
    Checks whether the SourceRanges of two results match
103
104
    :param original_file_dict: Dict of lists of file contents before changes
105
    :param diff_dict:          Dict of diffs describing the changes per file
106
    :param original_result_diff_dict: diff for each file for this result
107
    :param modified_result_diff_dict: guess
108
    :param renamed_files:   A dictionary containing file renamings across runs
109
    :return:                     Boolean value whether the SourceRanges match
110
    """
111
    for file_name in original_file_dict:
112
        mod_file_name = file_name
113
        if file_name in renamed_files:
114
            mod_file_name = renamed_files[file_name]
115
116
        try:  # fails if the affected range of the result get's modified
117
            original_total_diff = (diff_dict[file_name] +
118
                                   original_result_diff_dict[file_name])
119
        except ConflictError:
120
            return False
121
122
        # original file with file_diff and original_diff applied
123
        original_total_file = original_total_diff.modified
124
        # modified file with modified_diff applied
125
        modified_total_file = modified_result_diff_dict[mod_file_name].modified
126
        if original_total_file != modified_total_file:
127
            return False
128
    return True
129
130
131
def remove_range(file_contents, source_range):
132
    """
133
    removes the chars covered by the sourceRange from the file
134
135
    :param file_contents: list of lines in the file
136
    :param source_range:  Source Range
137
    :return:              list of file contents without specified chars removed
138
    """
139
    if not file_contents:
140
        return []
141
142
    newfile = list(file_contents)
143
    # attention: line numbers in the SourceRange are human-readable,
144
    # list indices start with 0
145
146
    source_range = source_range.expand(file_contents)
147
148
    if source_range.start.line == source_range.end.line:
149
        # if it's all in one line, replace the line by it's beginning and end
150
        newfile[source_range.start.line - 1] = (
151
            newfile[source_range.start.line - 1][:source_range.start.column-1]
152
            + newfile[source_range.start.line - 1][source_range.end.column:])
153
        if newfile[source_range.start.line - 1] == "":
154
            del newfile[source_range.start.line - 1]
155
    else:
156
        # cut away after start
157
        newfile[source_range.start.line - 1] = (
158
            newfile[source_range.start.line - 1][:source_range.start.column-1])
159
160
        # cut away before end
161
        newfile[source_range.end.line - 1] = (
162
            newfile[source_range.end.line - 1][source_range.end.column:])
163
164
        # start: index = first line number ==> line after first line
165
        # end: index = last line -2 ==> line before last line
166
167
        for i in reversed(range(
168
                source_range.start.line, source_range.end.line - 1)):
169
            del newfile[i]
170
171
        # remove leftover empty lines
172
        # the first line here is actually the former `source_range.end.line -1`
173
        if newfile[source_range.start.line] == "":
174
            del newfile[source_range.start.line]
175
        if newfile[source_range.start.line - 1] == "":
176
            del newfile[source_range.start.line - 1]
177
178
    return newfile
179
180
181
def remove_result_ranges_diffs(result_list, file_dict):
182
    """
183
    Calculates the diffs to all files in file_dict that describe the removal of
184
    each respective result's affected code.
185
186
    :param result_list: list of results
187
    :param file_dict:   dict of file contents
188
    :return:            returnvalue[result][file] is a diff of the changes the
189
                        removal of this result's affected code would cause for
190
                        the file.
191
    """
192
    result_diff_dict_dict = {}
193
    for original_result in result_list:
194
        mod_file_dict = copy.deepcopy(file_dict)
195
196
        # gather all source ranges from this result
197
        source_ranges = []
198
199
        # SourceRanges must be sorted backwards and overlaps must be eliminated
200
        # this way, the deletion based on sourceRanges is not offset by
201
        # previous deletions in the same line that invalidate the indices.
202
        previous = None
203
204
        for source_range in sorted(original_result.affected_code, reverse=True):
205
            # previous exists and overlaps
206
            if previous is not None and source_range.overlaps(previous):
207
                combined_sr = SourceRange.join(previous, source_range)
208
                previous = combined_sr
209
            elif previous is None:
210
                previous = source_range
211
            # previous exists but it doesn't overlap
212
            else:
213
                source_ranges.append(previous)
214
                previous = source_range
215
        # don't forget last entry if there were any:
216
        if previous:
217
            source_ranges.append(previous)
218
219
        for source_range in source_ranges:
220
            file_name = source_range.file
221
            new_file = remove_range(mod_file_dict[file_name],
222
                                    source_range)
223
            mod_file_dict[file_name] = new_file
224
225
        diff_dict = {}
226
        for file_name in file_dict:
227
            diff_dict[file_name] = Diff.from_string_arrays(
228
                file_dict[file_name],
229
                mod_file_dict[file_name])
230
231
        result_diff_dict_dict[original_result] = diff_dict
232
233
    return result_diff_dict_dict
234
235
236
def ensure_files_present(original_file_dict, modified_file_dict):
237
    """
238
    Ensures that all files are available as keys in both dicts. Return a
239
    dictionary of renamed files.
240
241
    :param original_file_dict: Dict of lists of file contents before  changes
242
    :param modified_file_dict: Dict of lists of file contents after changes
243
    """
244
    affected_files = set(original_file_dict.keys()).union(
245
        set(modified_file_dict.keys()))
246
    original_unique_files = affected_files - set(modified_file_dict.keys())
247
    renamed_files_dict = {}
248
    for file in affected_files:
249
        if file not in original_file_dict:
250
            renamed = 0
251
            for comparable_file in original_unique_files:
252
                s = SequenceMatcher(
253
                    None,
254
                    ''.join(modified_file_dict[file]),
255
                    ''.join(original_file_dict[comparable_file]))
256
                if s.quick_ratio() > 0.5:
257
                    if s.ratio() > 0.5:
258
                        renamed_files_dict[comparable_file] = file
259
                        renamed = 1
260
                        break
261
            if renamed == 0:
262
                original_file_dict[file] = []
263
        if file not in modified_file_dict:
264
            modified_file_dict[file] = []
265
    return renamed_files_dict
266