Completed
Pull Request — master (#1962)
by Udayan
01:37
created

coalib.results.ensure_files_present()   A

Complexity

Conditions 4

Size

Total Lines 14

Duplication

Lines 0
Ratio 0 %
Metric Value
cc 4
dl 0
loc 14
rs 9.2
1
import copy
2
3
from coalib.results.Diff import ConflictError, Diff
4
from coalib.results.SourceRange import SourceRange
5
6
7
def filter_results(original_file_dict,
8
                   modified_file_dict,
9
                   original_results,
10
                   modified_results):
11
    """
12
    Filters results for such ones that are unique across file changes
13
14
    :param original_file_dict: Dict of lists of file contents before  changes
15
    :param modified_file_dict: Dict of lists of file contents after changes
16
    :param original_results:   List of results of the old files
17
    :param modified_results:   List of results of the new files
18
    :return:                   List of results from new files that are unique
19
                               from all those that existed in the old changes
20
    """
21
22
    ensure_files_present(original_file_dict, modified_file_dict)
23
    # diffs_dict[file] is a diff between the original and modified file
24
    diffs_dict = {}
25
    for file in original_file_dict:
26
        diffs_dict[file] = Diff.from_string_arrays(original_file_dict[file],
27
                                                   modified_file_dict[file])
28
29
    orig_result_diff_dict_dict = remove_result_ranges_diffs(original_results,
30
                                                            original_file_dict)
31
32
    mod_result_diff_dict_dict = remove_result_ranges_diffs(modified_results,
33
                                                           modified_file_dict)
34
35
    unique_results = []
36
37
    for m_r in reversed(modified_results):
38
        unique = True
39
40
        for o_r in original_results:
41
42
            if basics_match(o_r, m_r):
43
                if source_ranges_match(original_file_dict,
44
                                       diffs_dict,
45
                                       orig_result_diff_dict_dict[o_r],
46
                                       mod_result_diff_dict_dict[m_r]):
47
48
                    # at least one original result matches completely
49
                    unique = False
50
                    break
51
        if unique:
52
            unique_results.append(m_r)
53
54
    return unique_results
55
56
57
def basics_match(original_result,
58
                 modified_result):
59
    """
60
    Checks whether the following properties of two results match:
61
    * origin
62
    * message
63
    * severity
64
    * debug_msg
65
66
    :param original_result: A result of the old files
67
    :param modified_result: A result of the new files
68
    :return:                Boolean value whether or not the properties match
69
    """
70
71
    return all(getattr(original_result, member) ==
72
               getattr(modified_result, member)
73
               for member in ['origin', 'message', 'severity', 'debug_msg'])
74
75
76
def source_ranges_match(original_file_dict,
77
                        diff_dict,
78
                        original_result_diff_dict,
79
                        modified_result_diff_dict):
80
    """
81
    Checks whether the SourceRanges of two results match
82
83
    :param original_file_dict: Dict of lists of file contents before changes
84
    :param diff_dict:          Dict of diffs describing the changes per file
85
    :param original_result_diff_dict: diff for each file for this result
86
    :param modified_result_diff_dict: guess
87
    :return:                     Boolean value whether the SourceRanges match
88
    """
89
    for file_name in original_file_dict:
90
91
        try:  # fails if the affected range of the result get's modified
92
            original_total_diff = (diff_dict[file_name] +
93
                                   original_result_diff_dict[file_name])
94
        except ConflictError:
95
            return False
96
97
        # original file with file_diff and original_diff applied
98
        original_total_file = original_total_diff.modified
99
        # modified file with modified_diff applied
100
        modified_total_file = modified_result_diff_dict[file_name].modified
101
        if original_total_file != modified_total_file:
102
            return False
103
    return True
104
105
106
def remove_range(file_contents, source_range):
107
    """
108
    removes the chars covered by the sourceRange from the file
109
110
    :param file_contents: list of lines in the file
111
    :param source_range:  Source Range
112
    :return:              list of file contents without specified chars removed
113
    """
114
    if not file_contents:
115
        return []
116
117
    newfile = list(file_contents)
118
    # attention: line numbers in the SourceRange are human-readable,
119
    # list indices start with 0
120
121
    source_range = source_range.expand(file_contents)
122
123
    if source_range.start.line == source_range.end.line:
124
        # if it's all in one line, replace the line by it's beginning and end
125
        newfile[source_range.start.line - 1] = (
126
            newfile[source_range.start.line - 1][:source_range.start.column-1]
127
            + newfile[source_range.start.line - 1][source_range.end.column:])
128
        if newfile[source_range.start.line - 1] == "":
129
            del newfile[source_range.start.line - 1]
130
    else:
131
        # cut away after start
132
        newfile[source_range.start.line - 1] = (
133
            newfile[source_range.start.line - 1][:source_range.start.column-1])
134
135
        # cut away before end
136
        newfile[source_range.end.line - 1] = (
137
            newfile[source_range.end.line - 1][source_range.end.column:])
138
139
        # start: index = first line number ==> line after first line
140
        # end: index = last line -2 ==> line before last line
141
142
        for i in reversed(range(
143
                source_range.start.line, source_range.end.line - 1)):
144
            del newfile[i]
145
146
        # remove leftover empty lines
147
        # the first line here is actually the former `source_range.end.line -1`
148
        if newfile[source_range.start.line] == "":
149
            del newfile[source_range.start.line]
150
        if newfile[source_range.start.line - 1] == "":
151
            del newfile[source_range.start.line - 1]
152
153
    return newfile
154
155
156
def remove_result_ranges_diffs(result_list, file_dict):
157
    """
158
    Calculates the diffs to all files in file_dict that describe the removal of
159
    each respective result's affected code.
160
161
    :param result_list: list of results
162
    :param file_dict:   dict of file contents
163
    :return:            returnvalue[result][file] is a diff of the changes the
164
                        removal of this result's affected code would cause for
165
                        the file.
166
    """
167
    result_diff_dict_dict = {}
168
    for original_result in result_list:
169
        mod_file_dict = copy.deepcopy(file_dict)
170
171
        # gather all source ranges from this result
172
        source_ranges = []
173
174
        # SourceRanges must be sorted backwards and overlaps must be eliminated
175
        # this way, the deletion based on sourceRanges is not offset by
176
        # previous deletions in the same line that invalidate the indices.
177
        previous = None
178
179
        for source_range in sorted(original_result.affected_code, reverse=True):
180
            # previous exists and overlaps
181
            if previous is not None and source_range.overlaps(previous):
182
                combined_sr = SourceRange.join(previous, source_range)
183
                previous = combined_sr
184
            elif previous is None:
185
                previous = source_range
186
            # previous exists but it doesn't overlap
187
            else:
188
                source_ranges.append(previous)
189
                previous = source_range
190
        # don't forget last entry if there were any:
191
        if previous:
192
            source_ranges.append(previous)
193
194
        for source_range in source_ranges:
195
            file_name = source_range.file
196
            new_file = remove_range(mod_file_dict[file_name],
197
                                    source_range)
198
            mod_file_dict[file_name] = new_file
199
200
        diff_dict = {}
201
        for file_name in file_dict:
202
            diff_dict[file_name] = Diff.from_string_arrays(
203
                file_dict[file_name],
204
                mod_file_dict[file_name])
205
206
        result_diff_dict_dict[original_result] = diff_dict
207
208
    return result_diff_dict_dict
209
210
211
def ensure_files_present(original_file_dict, modified_file_dict):
212
    """
213
    Ensures that all files are available as keys in both dicts.
214
215
    :param original_file_dict: Dict of lists of file contents before  changes
216
    :param modified_file_dict: Dict of lists of file contents after changes
217
    """
218
    affected_files = set(original_file_dict.keys()).union(
219
        set(modified_file_dict.keys()))
220
    for file in affected_files:
221
        if file not in original_file_dict:
222
            original_file_dict[file] = []
223
        if file not in modified_file_dict:
224
            modified_file_dict[file] = []
225