ensure_files_present()   F
last analyzed

Complexity

Conditions 9

Size

Total Lines 31

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
cc 9
dl 0
loc 31
rs 3
c 0
b 0
f 0
1
import copy
2
from difflib import SequenceMatcher
3
4
from coalib.results.Diff import ConflictError, Diff
5
from coalib.results.SourceRange import SourceRange
6
7
8
def filter_results(original_file_dict,
9
                   modified_file_dict,
10
                   original_results,
11
                   modified_results):
12
    """
13
    Filters results for such ones that are unique across file changes
14
15
    :param original_file_dict: Dict of lists of file contents before  changes
16
    :param modified_file_dict: Dict of lists of file contents after changes
17
    :param original_results:   List of results of the old files
18
    :param modified_results:   List of results of the new files
19
    :return:                   List of results from new files that are unique
20
                               from all those that existed in the old changes
21
    """
22
23
    renamed_files = ensure_files_present(original_file_dict,
24
                                         modified_file_dict)
25
    # diffs_dict[file] is a diff between the original and modified file
26
    diffs_dict = {}
27
    for file in original_file_dict:
28
        diffs_dict[file] = Diff.from_string_arrays(
29
            original_file_dict[file],
30
            modified_file_dict[renamed_files.get(file, file)])
31
32
    orig_result_diff_dict_dict = remove_result_ranges_diffs(original_results,
33
                                                            original_file_dict)
34
35
    mod_result_diff_dict_dict = remove_result_ranges_diffs(modified_results,
36
                                                           modified_file_dict)
37
38
    unique_results = []
39
40
    for m_r in reversed(modified_results):
41
        unique = True
42
43
        for o_r in original_results:
44
45
            if basics_match(o_r, m_r):
46
                if source_ranges_match(original_file_dict,
47
                                       diffs_dict,
48
                                       orig_result_diff_dict_dict[o_r],
49
                                       mod_result_diff_dict_dict[m_r],
50
                                       renamed_files):
51
52
                    # at least one original result matches completely
53
                    unique = False
54
                    break
55
        if unique:
56
            unique_results.append(m_r)
57
58
    return unique_results
59
60
61
def basics_match(original_result,
62
                 modified_result):
63
    """
64
    Checks whether the following properties of two results match:
65
    * origin
66
    * message
67
    * severity
68
    * debug_msg
69
70
    :param original_result: A result of the old files
71
    :param modified_result: A result of the new files
72
    :return:                Boolean value whether or not the properties match
73
    """
74
75
    return all(getattr(original_result, member) ==
76
               getattr(modified_result, member)
77
               for member in ['origin', 'message', 'severity', 'debug_msg'])
78
79
80
def source_ranges_match(original_file_dict,
81
                        diff_dict,
82
                        original_result_diff_dict,
83
                        modified_result_diff_dict,
84
                        renamed_files):
85
    """
86
    Checks whether the SourceRanges of two results match
87
88
    :param original_file_dict: Dict of lists of file contents before changes
89
    :param diff_dict:          Dict of diffs describing the changes per file
90
    :param original_result_diff_dict: diff for each file for this result
91
    :param modified_result_diff_dict: guess
92
    :param renamed_files:   A dictionary containing file renamings across runs
93
    :return:                     Boolean value whether the SourceRanges match
94
    """
95
    for file_name in original_file_dict:
96
97
        try:  # fails if the affected range of the result get's modified
98
            original_total_diff = (diff_dict[file_name] +
99
                                   original_result_diff_dict[file_name])
100
        except ConflictError:
101
            return False
102
103
        # original file with file_diff and original_diff applied
104
        original_total_file = original_total_diff.modified
105
        # modified file with modified_diff applied
106
        modified_total_file = modified_result_diff_dict[
107
            renamed_files.get(file_name, file_name)].modified
108
        if original_total_file != modified_total_file:
109
            return False
110
    return True
111
112
113
def remove_range(file_contents, source_range):
114
    """
115
    removes the chars covered by the sourceRange from the file
116
117
    :param file_contents: list of lines in the file
118
    :param source_range:  Source Range
119
    :return:              list of file contents without specified chars removed
120
    """
121
    if not file_contents:
122
        return []
123
124
    newfile = list(file_contents)
125
    # attention: line numbers in the SourceRange are human-readable,
126
    # list indices start with 0
127
128
    source_range = source_range.expand(file_contents)
129
130
    if source_range.start.line == source_range.end.line:
131
        # if it's all in one line, replace the line by it's beginning and end
132
        newfile[source_range.start.line - 1] = (
133
            newfile[source_range.start.line - 1][:source_range.start.column-1]
134
            + newfile[source_range.start.line - 1][source_range.end.column:])
135
        if newfile[source_range.start.line - 1] == "":
136
            del newfile[source_range.start.line - 1]
137
    else:
138
        # cut away after start
139
        newfile[source_range.start.line - 1] = (
140
            newfile[source_range.start.line - 1][:source_range.start.column-1])
141
142
        # cut away before end
143
        newfile[source_range.end.line - 1] = (
144
            newfile[source_range.end.line - 1][source_range.end.column:])
145
146
        # start: index = first line number ==> line after first line
147
        # end: index = last line -2 ==> line before last line
148
149
        for i in reversed(range(
150
                source_range.start.line, source_range.end.line - 1)):
151
            del newfile[i]
152
153
        # remove leftover empty lines
154
        # the first line here is actually the former `source_range.end.line -1`
155
        if newfile[source_range.start.line] == "":
156
            del newfile[source_range.start.line]
157
        if newfile[source_range.start.line - 1] == "":
158
            del newfile[source_range.start.line - 1]
159
160
    return newfile
161
162
163
def remove_result_ranges_diffs(result_list, file_dict):
164
    """
165
    Calculates the diffs to all files in file_dict that describe the removal of
166
    each respective result's affected code.
167
168
    :param result_list: list of results
169
    :param file_dict:   dict of file contents
170
    :return:            returnvalue[result][file] is a diff of the changes the
171
                        removal of this result's affected code would cause for
172
                        the file.
173
    """
174
    result_diff_dict_dict = {}
175
    for original_result in result_list:
176
        mod_file_dict = copy.deepcopy(file_dict)
177
178
        # gather all source ranges from this result
179
        source_ranges = []
180
181
        # SourceRanges must be sorted backwards and overlaps must be eliminated
182
        # this way, the deletion based on sourceRanges is not offset by
183
        # previous deletions in the same line that invalidate the indices.
184
        previous = None
185
186
        for source_range in sorted(original_result.affected_code, reverse=True):
187
            # previous exists and overlaps
188
            if previous is not None and source_range.overlaps(previous):
189
                combined_sr = SourceRange.join(previous, source_range)
190
                previous = combined_sr
191
            elif previous is None:
192
                previous = source_range
193
            # previous exists but it doesn't overlap
194
            else:
195
                source_ranges.append(previous)
196
                previous = source_range
197
        # don't forget last entry if there were any:
198
        if previous:
199
            source_ranges.append(previous)
200
201
        for source_range in source_ranges:
202
            file_name = source_range.file
203
            new_file = remove_range(mod_file_dict[file_name],
204
                                    source_range)
205
            mod_file_dict[file_name] = new_file
206
207
        diff_dict = {}
208
        for file_name in file_dict:
209
            diff_dict[file_name] = Diff.from_string_arrays(
210
                file_dict[file_name],
211
                mod_file_dict[file_name])
212
213
        result_diff_dict_dict[original_result] = diff_dict
214
215
    return result_diff_dict_dict
216
217
218
def ensure_files_present(original_file_dict, modified_file_dict):
219
    """
220
    Ensures that all files are available as keys in both dicts.
221
222
    :param original_file_dict: Dict of lists of file contents before  changes
223
    :param modified_file_dict: Dict of lists of file contents after changes
224
    :return:                   Return a dictionary of renamed files.
225
    """
226
    original_files = set(original_file_dict.keys())
227
    modified_files = set(modified_file_dict.keys())
228
    affected_files = original_files | modified_files
229
    original_unique_files = affected_files - modified_files
230
    renamed_files_dict = {}
231
    for file in filter(
232
            lambda filter_file: filter_file not in original_files,
233
            affected_files):
234
        for comparable_file in original_unique_files:
235
            s = SequenceMatcher(
236
                None,
237
                ''.join(modified_file_dict[file]),
238
                ''.join(original_file_dict[comparable_file]))
239
            if s.real_quick_ratio() >= 0.5 and s.ratio() > 0.5:
240
                renamed_files_dict[comparable_file] = file
241
                break
242
        else:
243
            original_file_dict[file] = []
244
    for file in filter(
245
            lambda filter_file: filter_file not in modified_files,
246
            affected_files):
247
        modified_file_dict[file] = []
248
    return renamed_files_dict
249