1
|
|
|
import copy |
2
|
|
|
|
3
|
|
|
from coalib.results.Diff import ConflictError, Diff |
4
|
|
|
from coalib.results.SourceRange import SourceRange |
5
|
|
|
|
6
|
|
|
|
7
|
|
|
def filter_results(original_file_dict, |
8
|
|
|
modified_file_dict, |
9
|
|
|
original_results, |
10
|
|
|
modified_results): |
11
|
|
|
""" |
12
|
|
|
Filters results for such ones that are unique across file changes |
13
|
|
|
|
14
|
|
|
:param original_file_dict: Dict of lists of file contents before changes |
15
|
|
|
:param modified_file_dict: Dict of lists of file contents after changes |
16
|
|
|
:param original_results: List of results of the old files |
17
|
|
|
:param modified_results: List of results of the new files |
18
|
|
|
:return: List of results from new files that are unique |
19
|
|
|
from all those that existed in the old changes |
20
|
|
|
""" |
21
|
|
|
|
22
|
|
|
ensure_files_present(original_file_dict, modified_file_dict) |
23
|
|
|
# diffs_dict[file] is a diff between the original and modified file |
24
|
|
|
diffs_dict = {} |
25
|
|
|
for file in original_file_dict: |
26
|
|
|
diffs_dict[file] = Diff.from_string_arrays(original_file_dict[file], |
27
|
|
|
modified_file_dict[file]) |
28
|
|
|
|
29
|
|
|
orig_result_diff_dict_dict = remove_result_ranges_diffs(original_results, |
30
|
|
|
original_file_dict) |
31
|
|
|
|
32
|
|
|
mod_result_diff_dict_dict = remove_result_ranges_diffs(modified_results, |
33
|
|
|
modified_file_dict) |
34
|
|
|
|
35
|
|
|
unique_results = [] |
36
|
|
|
|
37
|
|
|
for m_r in reversed(modified_results): |
38
|
|
|
unique = True |
39
|
|
|
|
40
|
|
|
for o_r in original_results: |
41
|
|
|
|
42
|
|
|
if basics_match(o_r, m_r): |
43
|
|
|
if source_ranges_match(original_file_dict, |
44
|
|
|
diffs_dict, |
45
|
|
|
orig_result_diff_dict_dict[o_r], |
46
|
|
|
mod_result_diff_dict_dict[m_r]): |
47
|
|
|
|
48
|
|
|
# at least one original result matches completely |
49
|
|
|
unique = False |
50
|
|
|
break |
51
|
|
|
if unique: |
52
|
|
|
unique_results.append(m_r) |
53
|
|
|
|
54
|
|
|
return unique_results |
55
|
|
|
|
56
|
|
|
|
57
|
|
|
def basics_match(original_result, |
58
|
|
|
modified_result): |
59
|
|
|
""" |
60
|
|
|
Checks whether the following properties of two results match: |
61
|
|
|
* origin |
62
|
|
|
* message |
63
|
|
|
* severity |
64
|
|
|
* debug_msg |
65
|
|
|
|
66
|
|
|
:param original_result: A result of the old files |
67
|
|
|
:param modified_result: A result of the new files |
68
|
|
|
:return: Boolean value whether or not the properties match |
69
|
|
|
""" |
70
|
|
|
|
71
|
|
|
return all(getattr(original_result, member) == |
72
|
|
|
getattr(modified_result, member) |
73
|
|
|
for member in ['origin', 'message', 'severity', 'debug_msg']) |
74
|
|
|
|
75
|
|
|
|
76
|
|
|
def source_ranges_match(original_file_dict, |
77
|
|
|
diff_dict, |
78
|
|
|
original_result_diff_dict, |
79
|
|
|
modified_result_diff_dict): |
80
|
|
|
""" |
81
|
|
|
Checks whether the SourceRanges of two results match |
82
|
|
|
|
83
|
|
|
:param original_file_dict: Dict of lists of file contents before changes |
84
|
|
|
:param diff_dict: Dict of diffs describing the changes per file |
85
|
|
|
:param original_result_diff_dict: diff for each file for this result |
86
|
|
|
:param modified_result_diff_dict: guess |
87
|
|
|
:return: Boolean value whether the SourceRanges match |
88
|
|
|
""" |
89
|
|
|
for file_name in original_file_dict: |
90
|
|
|
|
91
|
|
|
try: # fails if the affected range of the result get's modified |
92
|
|
|
original_total_diff = (diff_dict[file_name] + |
93
|
|
|
original_result_diff_dict[file_name]) |
94
|
|
|
except ConflictError: |
95
|
|
|
return False |
96
|
|
|
|
97
|
|
|
# original file with file_diff and original_diff applied |
98
|
|
|
original_total_file = original_total_diff.modified |
99
|
|
|
# modified file with modified_diff applied |
100
|
|
|
modified_total_file = modified_result_diff_dict[file_name].modified |
101
|
|
|
if original_total_file != modified_total_file: |
102
|
|
|
return False |
103
|
|
|
return True |
104
|
|
|
|
105
|
|
|
|
106
|
|
|
def remove_range(file_contents, source_range): |
107
|
|
|
""" |
108
|
|
|
removes the chars covered by the sourceRange from the file |
109
|
|
|
|
110
|
|
|
:param file_contents: list of lines in the file |
111
|
|
|
:param source_range: Source Range |
112
|
|
|
:return: list of file contents without specified chars removed |
113
|
|
|
""" |
114
|
|
|
if not file_contents: |
115
|
|
|
return [] |
116
|
|
|
|
117
|
|
|
newfile = list(file_contents) |
118
|
|
|
# attention: line numbers in the SourceRange are human-readable, |
119
|
|
|
# list indices start with 0 |
120
|
|
|
|
121
|
|
|
source_range = source_range.expand(file_contents) |
122
|
|
|
|
123
|
|
|
if source_range.start.line == source_range.end.line: |
124
|
|
|
# if it's all in one line, replace the line by it's beginning and end |
125
|
|
|
newfile[source_range.start.line - 1] = ( |
126
|
|
|
newfile[source_range.start.line - 1][:source_range.start.column-1] |
127
|
|
|
+ newfile[source_range.start.line - 1][source_range.end.column:]) |
128
|
|
|
if newfile[source_range.start.line - 1] == "": |
129
|
|
|
del newfile[source_range.start.line - 1] |
130
|
|
|
else: |
131
|
|
|
# cut away after start |
132
|
|
|
newfile[source_range.start.line - 1] = ( |
133
|
|
|
newfile[source_range.start.line - 1][:source_range.start.column-1]) |
134
|
|
|
|
135
|
|
|
# cut away before end |
136
|
|
|
newfile[source_range.end.line - 1] = ( |
137
|
|
|
newfile[source_range.end.line - 1][source_range.end.column:]) |
138
|
|
|
|
139
|
|
|
# start: index = first line number ==> line after first line |
140
|
|
|
# end: index = last line -2 ==> line before last line |
141
|
|
|
|
142
|
|
|
for i in reversed(range( |
143
|
|
|
source_range.start.line, source_range.end.line - 1)): |
144
|
|
|
del newfile[i] |
145
|
|
|
|
146
|
|
|
# remove leftover empty lines |
147
|
|
|
# the first line here is actually the former `source_range.end.line -1` |
148
|
|
|
if newfile[source_range.start.line] == "": |
149
|
|
|
del newfile[source_range.start.line] |
150
|
|
|
if newfile[source_range.start.line - 1] == "": |
151
|
|
|
del newfile[source_range.start.line - 1] |
152
|
|
|
|
153
|
|
|
return newfile |
154
|
|
|
|
155
|
|
|
|
156
|
|
|
def remove_result_ranges_diffs(result_list, file_dict): |
157
|
|
|
""" |
158
|
|
|
Calculates the diffs to all files in file_dict that describe the removal of |
159
|
|
|
each respective result's affected code. |
160
|
|
|
|
161
|
|
|
:param result_list: list of results |
162
|
|
|
:param file_dict: dict of file contents |
163
|
|
|
:return: returnvalue[result][file] is a diff of the changes the |
164
|
|
|
removal of this result's affected code would cause for |
165
|
|
|
the file. |
166
|
|
|
""" |
167
|
|
|
result_diff_dict_dict = {} |
168
|
|
|
for original_result in result_list: |
169
|
|
|
mod_file_dict = copy.deepcopy(file_dict) |
170
|
|
|
|
171
|
|
|
# gather all source ranges from this result |
172
|
|
|
source_ranges = [] |
173
|
|
|
|
174
|
|
|
# SourceRanges must be sorted backwards and overlaps must be eliminated |
175
|
|
|
# this way, the deletion based on sourceRanges is not offset by |
176
|
|
|
# previous deletions in the same line that invalidate the indices. |
177
|
|
|
previous = None |
178
|
|
|
|
179
|
|
|
for source_range in sorted(original_result.affected_code, reverse=True): |
180
|
|
|
# previous exists and overlaps |
181
|
|
|
if previous is not None and source_range.overlaps(previous): |
182
|
|
|
combined_sr = SourceRange.join(previous, source_range) |
183
|
|
|
previous = combined_sr |
184
|
|
|
elif previous is None: |
185
|
|
|
previous = source_range |
186
|
|
|
# previous exists but it doesn't overlap |
187
|
|
|
else: |
188
|
|
|
source_ranges.append(previous) |
189
|
|
|
previous = source_range |
190
|
|
|
# don't forget last entry if there were any: |
191
|
|
|
if previous: |
192
|
|
|
source_ranges.append(previous) |
193
|
|
|
|
194
|
|
|
for source_range in source_ranges: |
195
|
|
|
file_name = source_range.file |
196
|
|
|
new_file = remove_range(mod_file_dict[file_name], |
197
|
|
|
source_range) |
198
|
|
|
mod_file_dict[file_name] = new_file |
199
|
|
|
|
200
|
|
|
diff_dict = {} |
201
|
|
|
for file_name in file_dict: |
202
|
|
|
diff_dict[file_name] = Diff.from_string_arrays( |
203
|
|
|
file_dict[file_name], |
204
|
|
|
mod_file_dict[file_name]) |
205
|
|
|
|
206
|
|
|
result_diff_dict_dict[original_result] = diff_dict |
207
|
|
|
|
208
|
|
|
return result_diff_dict_dict |
209
|
|
|
|
210
|
|
|
|
211
|
|
|
def ensure_files_present(original_file_dict, modified_file_dict): |
212
|
|
|
""" |
213
|
|
|
Ensures that all files are available as keys in both dicts. |
214
|
|
|
|
215
|
|
|
:param original_file_dict: Dict of lists of file contents before changes |
216
|
|
|
:param modified_file_dict: Dict of lists of file contents after changes |
217
|
|
|
""" |
218
|
|
|
affected_files = set(original_file_dict.keys()).union( |
219
|
|
|
set(modified_file_dict.keys())) |
220
|
|
|
for file in affected_files: |
221
|
|
|
if file not in original_file_dict: |
222
|
|
|
original_file_dict[file] = [] |
223
|
|
|
if file not in modified_file_dict: |
224
|
|
|
modified_file_dict[file] = [] |
225
|
|
|
|