1
|
|
|
import copy |
2
|
|
|
from difflib import SequenceMatcher |
3
|
|
|
|
4
|
|
|
from coalib.results.Diff import ConflictError, Diff |
5
|
|
|
from coalib.results.SourceRange import SourceRange |
6
|
|
|
|
7
|
|
|
|
8
|
|
|
def filter_results(original_file_dict, |
9
|
|
|
modified_file_dict, |
10
|
|
|
original_results, |
11
|
|
|
modified_results): |
12
|
|
|
""" |
13
|
|
|
Filters results for such ones that are unique across file changes |
14
|
|
|
|
15
|
|
|
:param original_file_dict: Dict of lists of file contents before changes |
16
|
|
|
:param modified_file_dict: Dict of lists of file contents after changes |
17
|
|
|
:param original_results: List of results of the old files |
18
|
|
|
:param modified_results: List of results of the new files |
19
|
|
|
:return: List of results from new files that are unique |
20
|
|
|
from all those that existed in the old changes |
21
|
|
|
""" |
22
|
|
|
|
23
|
|
|
renamed_files = ensure_files_present(original_file_dict, |
24
|
|
|
modified_file_dict) |
25
|
|
|
# diffs_dict[file] is a diff between the original and modified file |
26
|
|
|
diffs_dict = {} |
27
|
|
|
for file in original_file_dict: |
28
|
|
|
diffs_dict[file] = Diff.from_string_arrays( |
29
|
|
|
original_file_dict[file], |
30
|
|
|
modified_file_dict[renamed_files.get(file, file)]) |
31
|
|
|
|
32
|
|
|
orig_result_diff_dict_dict = remove_result_ranges_diffs(original_results, |
33
|
|
|
original_file_dict) |
34
|
|
|
|
35
|
|
|
mod_result_diff_dict_dict = remove_result_ranges_diffs(modified_results, |
36
|
|
|
modified_file_dict) |
37
|
|
|
|
38
|
|
|
unique_results = [] |
39
|
|
|
|
40
|
|
|
for m_r in reversed(modified_results): |
41
|
|
|
unique = True |
42
|
|
|
|
43
|
|
|
for o_r in original_results: |
44
|
|
|
|
45
|
|
|
if basics_match(o_r, m_r): |
46
|
|
|
if source_ranges_match(original_file_dict, |
47
|
|
|
diffs_dict, |
48
|
|
|
orig_result_diff_dict_dict[o_r], |
49
|
|
|
mod_result_diff_dict_dict[m_r], |
50
|
|
|
renamed_files): |
51
|
|
|
|
52
|
|
|
# at least one original result matches completely |
53
|
|
|
unique = False |
54
|
|
|
break |
55
|
|
|
if unique: |
56
|
|
|
unique_results.append(m_r) |
57
|
|
|
|
58
|
|
|
return unique_results |
59
|
|
|
|
60
|
|
|
|
61
|
|
|
def basics_match(original_result, |
62
|
|
|
modified_result): |
63
|
|
|
""" |
64
|
|
|
Checks whether the following properties of two results match: |
65
|
|
|
* origin |
66
|
|
|
* message |
67
|
|
|
* severity |
68
|
|
|
* debug_msg |
69
|
|
|
|
70
|
|
|
:param original_result: A result of the old files |
71
|
|
|
:param modified_result: A result of the new files |
72
|
|
|
:return: Boolean value whether or not the properties match |
73
|
|
|
""" |
74
|
|
|
|
75
|
|
|
return all(getattr(original_result, member) == |
76
|
|
|
getattr(modified_result, member) |
77
|
|
|
for member in ['origin', 'message', 'severity', 'debug_msg']) |
78
|
|
|
|
79
|
|
|
|
80
|
|
|
def source_ranges_match(original_file_dict, |
81
|
|
|
diff_dict, |
82
|
|
|
original_result_diff_dict, |
83
|
|
|
modified_result_diff_dict, |
84
|
|
|
renamed_files): |
85
|
|
|
""" |
86
|
|
|
Checks whether the SourceRanges of two results match |
87
|
|
|
|
88
|
|
|
:param original_file_dict: Dict of lists of file contents before changes |
89
|
|
|
:param diff_dict: Dict of diffs describing the changes per file |
90
|
|
|
:param original_result_diff_dict: diff for each file for this result |
91
|
|
|
:param modified_result_diff_dict: guess |
92
|
|
|
:param renamed_files: A dictionary containing file renamings across runs |
93
|
|
|
:return: Boolean value whether the SourceRanges match |
94
|
|
|
""" |
95
|
|
|
for file_name in original_file_dict: |
96
|
|
|
|
97
|
|
|
try: # fails if the affected range of the result get's modified |
98
|
|
|
original_total_diff = (diff_dict[file_name] + |
99
|
|
|
original_result_diff_dict[file_name]) |
100
|
|
|
except ConflictError: |
101
|
|
|
return False |
102
|
|
|
|
103
|
|
|
# original file with file_diff and original_diff applied |
104
|
|
|
original_total_file = original_total_diff.modified |
105
|
|
|
# modified file with modified_diff applied |
106
|
|
|
modified_total_file = modified_result_diff_dict[ |
107
|
|
|
renamed_files.get(file_name, file_name)].modified |
108
|
|
|
if original_total_file != modified_total_file: |
109
|
|
|
return False |
110
|
|
|
return True |
111
|
|
|
|
112
|
|
|
|
113
|
|
|
def remove_range(file_contents, source_range): |
114
|
|
|
""" |
115
|
|
|
removes the chars covered by the sourceRange from the file |
116
|
|
|
|
117
|
|
|
:param file_contents: list of lines in the file |
118
|
|
|
:param source_range: Source Range |
119
|
|
|
:return: list of file contents without specified chars removed |
120
|
|
|
""" |
121
|
|
|
if not file_contents: |
122
|
|
|
return [] |
123
|
|
|
|
124
|
|
|
newfile = list(file_contents) |
125
|
|
|
# attention: line numbers in the SourceRange are human-readable, |
126
|
|
|
# list indices start with 0 |
127
|
|
|
|
128
|
|
|
source_range = source_range.expand(file_contents) |
129
|
|
|
|
130
|
|
|
if source_range.start.line == source_range.end.line: |
131
|
|
|
# if it's all in one line, replace the line by it's beginning and end |
132
|
|
|
newfile[source_range.start.line - 1] = ( |
133
|
|
|
newfile[source_range.start.line - 1][:source_range.start.column-1] |
134
|
|
|
+ newfile[source_range.start.line - 1][source_range.end.column:]) |
135
|
|
|
if newfile[source_range.start.line - 1] == "": |
136
|
|
|
del newfile[source_range.start.line - 1] |
137
|
|
|
else: |
138
|
|
|
# cut away after start |
139
|
|
|
newfile[source_range.start.line - 1] = ( |
140
|
|
|
newfile[source_range.start.line - 1][:source_range.start.column-1]) |
141
|
|
|
|
142
|
|
|
# cut away before end |
143
|
|
|
newfile[source_range.end.line - 1] = ( |
144
|
|
|
newfile[source_range.end.line - 1][source_range.end.column:]) |
145
|
|
|
|
146
|
|
|
# start: index = first line number ==> line after first line |
147
|
|
|
# end: index = last line -2 ==> line before last line |
148
|
|
|
|
149
|
|
|
for i in reversed(range( |
150
|
|
|
source_range.start.line, source_range.end.line - 1)): |
151
|
|
|
del newfile[i] |
152
|
|
|
|
153
|
|
|
# remove leftover empty lines |
154
|
|
|
# the first line here is actually the former `source_range.end.line -1` |
155
|
|
|
if newfile[source_range.start.line] == "": |
156
|
|
|
del newfile[source_range.start.line] |
157
|
|
|
if newfile[source_range.start.line - 1] == "": |
158
|
|
|
del newfile[source_range.start.line - 1] |
159
|
|
|
|
160
|
|
|
return newfile |
161
|
|
|
|
162
|
|
|
|
163
|
|
|
def remove_result_ranges_diffs(result_list, file_dict): |
164
|
|
|
""" |
165
|
|
|
Calculates the diffs to all files in file_dict that describe the removal of |
166
|
|
|
each respective result's affected code. |
167
|
|
|
|
168
|
|
|
:param result_list: list of results |
169
|
|
|
:param file_dict: dict of file contents |
170
|
|
|
:return: returnvalue[result][file] is a diff of the changes the |
171
|
|
|
removal of this result's affected code would cause for |
172
|
|
|
the file. |
173
|
|
|
""" |
174
|
|
|
result_diff_dict_dict = {} |
175
|
|
|
for original_result in result_list: |
176
|
|
|
mod_file_dict = copy.deepcopy(file_dict) |
177
|
|
|
|
178
|
|
|
# gather all source ranges from this result |
179
|
|
|
source_ranges = [] |
180
|
|
|
|
181
|
|
|
# SourceRanges must be sorted backwards and overlaps must be eliminated |
182
|
|
|
# this way, the deletion based on sourceRanges is not offset by |
183
|
|
|
# previous deletions in the same line that invalidate the indices. |
184
|
|
|
previous = None |
185
|
|
|
|
186
|
|
|
for source_range in sorted(original_result.affected_code, reverse=True): |
187
|
|
|
# previous exists and overlaps |
188
|
|
|
if previous is not None and source_range.overlaps(previous): |
189
|
|
|
combined_sr = SourceRange.join(previous, source_range) |
190
|
|
|
previous = combined_sr |
191
|
|
|
elif previous is None: |
192
|
|
|
previous = source_range |
193
|
|
|
# previous exists but it doesn't overlap |
194
|
|
|
else: |
195
|
|
|
source_ranges.append(previous) |
196
|
|
|
previous = source_range |
197
|
|
|
# don't forget last entry if there were any: |
198
|
|
|
if previous: |
199
|
|
|
source_ranges.append(previous) |
200
|
|
|
|
201
|
|
|
for source_range in source_ranges: |
202
|
|
|
file_name = source_range.file |
203
|
|
|
new_file = remove_range(mod_file_dict[file_name], |
204
|
|
|
source_range) |
205
|
|
|
mod_file_dict[file_name] = new_file |
206
|
|
|
|
207
|
|
|
diff_dict = {} |
208
|
|
|
for file_name in file_dict: |
209
|
|
|
diff_dict[file_name] = Diff.from_string_arrays( |
210
|
|
|
file_dict[file_name], |
211
|
|
|
mod_file_dict[file_name]) |
212
|
|
|
|
213
|
|
|
result_diff_dict_dict[original_result] = diff_dict |
214
|
|
|
|
215
|
|
|
return result_diff_dict_dict |
216
|
|
|
|
217
|
|
|
|
218
|
|
|
def ensure_files_present(original_file_dict, modified_file_dict): |
219
|
|
|
""" |
220
|
|
|
Ensures that all files are available as keys in both dicts. |
221
|
|
|
|
222
|
|
|
:param original_file_dict: Dict of lists of file contents before changes |
223
|
|
|
:param modified_file_dict: Dict of lists of file contents after changes |
224
|
|
|
:return: Return a dictionary of renamed files. |
225
|
|
|
""" |
226
|
|
|
original_files = set(original_file_dict.keys()) |
227
|
|
|
modified_files = set(modified_file_dict.keys()) |
228
|
|
|
affected_files = original_files | modified_files |
229
|
|
|
original_unique_files = affected_files - modified_files |
230
|
|
|
renamed_files_dict = {} |
231
|
|
|
for file in filter( |
232
|
|
|
lambda filter_file: filter_file not in original_files, |
233
|
|
|
affected_files): |
234
|
|
|
for comparable_file in original_unique_files: |
235
|
|
|
s = SequenceMatcher( |
236
|
|
|
None, |
237
|
|
|
''.join(modified_file_dict[file]), |
238
|
|
|
''.join(original_file_dict[comparable_file])) |
239
|
|
|
if s.real_quick_ratio() >= 0.5 and s.ratio() > 0.5: |
240
|
|
|
renamed_files_dict[comparable_file] = file |
241
|
|
|
break |
242
|
|
|
else: |
243
|
|
|
original_file_dict[file] = [] |
244
|
|
|
for file in filter( |
245
|
|
|
lambda filter_file: filter_file not in modified_files, |
246
|
|
|
affected_files): |
247
|
|
|
modified_file_dict[file] = [] |
248
|
|
|
return renamed_files_dict |
249
|
|
|
|