ensure_files_present() - Code Metrics - Inspection of "Udayan/res filter" - coala-analyzer/coala - Measure and Improve Code Quality continuously with Scrutinizer

Completed

Pull Request — master (#2100)

by Udayan

created 2016-05-10 07:21 UTC

ensure_files_present() F

↳ Parent: Project

Complexity

Conditions

Size

Total Lines

Duplication

Lines	0
Ratio	0 %

Importance

Changes	3
Bugs	0	Features	0

Metric	Value
cc	9
c	3
b	0
f	0
dl	0
loc	30
rs	3

import copy
from difflib import SequenceMatcher

from coalib.results.Diff import ConflictError, Diff
from coalib.results.SourceRange import SourceRange


def filter_results(original_file_dict,
                   modified_file_dict,
                   original_results,
                   modified_results):
    """
    Filters results for such ones that are unique across file changes

    :param original_file_dict: Dict of lists of file contents before  changes
    :param modified_file_dict: Dict of lists of file contents after changes
    :param original_results:   List of results of the old files
    :param modified_results:   List of results of the new files
    :return:                   List of results from new files that are unique
                               from all those that existed in the old changes
    """

    renamed_files = ensure_files_present(original_file_dict,
                                         modified_file_dict)
    # diffs_dict[file] is a diff between the original and modified file
    diffs_dict = {}
    for file in original_file_dict:
        if file in renamed_files:
            diffs_dict[file] = Diff.from_string_arrays(
                original_file_dict[file],
                modified_file_dict[renamed_files[file]])
        else:
            diffs_dict[file] = Diff.from_string_arrays(
                original_file_dict[file],
                modified_file_dict[file])

    orig_result_diff_dict_dict = remove_result_ranges_diffs(original_results,
                                                            original_file_dict)

    mod_result_diff_dict_dict = remove_result_ranges_diffs(modified_results,
                                                           modified_file_dict)

    unique_results = []

    for m_r in reversed(modified_results):
        unique = True

        for o_r in original_results:

            if basics_match(o_r, m_r):
                if source_ranges_match(original_file_dict,
                                       diffs_dict,
                                       orig_result_diff_dict_dict[o_r],
                                       mod_result_diff_dict_dict[m_r],
                                       renamed_files):

                    # at least one original result matches completely
                    unique = False
                    break
        if unique:
            unique_results.append(m_r)

    return unique_results


def basics_match(original_result,
                 modified_result):
    """
    Checks whether the following properties of two results match:
    * origin
    * message
    * severity
    * debug_msg

    :param original_result: A result of the old files
    :param modified_result: A result of the new files
    :return:                Boolean value whether or not the properties match
    """

    return all(getattr(original_result, member) ==
               getattr(modified_result, member)
               for member in ['origin', 'message', 'severity', 'debug_msg'])


def source_ranges_match(original_file_dict,
                        diff_dict,
                        original_result_diff_dict,
                        modified_result_diff_dict,
                        renamed_files):
    """
    Checks whether the SourceRanges of two results match

    :param original_file_dict: Dict of lists of file contents before changes
    :param diff_dict:          Dict of diffs describing the changes per file
    :param original_result_diff_dict: diff for each file for this result
    :param modified_result_diff_dict: guess
    :param renamed_files:   A dictionary containing file renamings across runs
    :return:                     Boolean value whether the SourceRanges match
    """
    for file_name in original_file_dict:
        if file_name in renamed_files:
            mod_file_name = renamed_files[file_name]
        else:
            mod_file_name = file_name

        try:  # fails if the affected range of the result get's modified
            original_total_diff = (diff_dict[file_name] +
                                   original_result_diff_dict[file_name])
        except ConflictError:
            return False

        # original file with file_diff and original_diff applied
        original_total_file = original_total_diff.modified
        # modified file with modified_diff applied
        modified_total_file = modified_result_diff_dict[mod_file_name].modified
        if original_total_file != modified_total_file:
            return False
    return True


def remove_range(file_contents, source_range):
    """
    removes the chars covered by the sourceRange from the file

    :param file_contents: list of lines in the file
    :param source_range:  Source Range
    :return:              list of file contents without specified chars removed
    """
    if not file_contents:
        return []

    newfile = list(file_contents)
    # attention: line numbers in the SourceRange are human-readable,
    # list indices start with 0

    source_range = source_range.expand(file_contents)

    if source_range.start.line == source_range.end.line:
        # if it's all in one line, replace the line by it's beginning and end
        newfile[source_range.start.line - 1] = (
            newfile[source_range.start.line - 1][:source_range.start.column-1]
            + newfile[source_range.start.line - 1][source_range.end.column:])
        if newfile[source_range.start.line - 1] == "":
            del newfile[source_range.start.line - 1]
    else:
        # cut away after start
        newfile[source_range.start.line - 1] = (
            newfile[source_range.start.line - 1][:source_range.start.column-1])

        # cut away before end
        newfile[source_range.end.line - 1] = (
            newfile[source_range.end.line - 1][source_range.end.column:])

        # start: index = first line number ==> line after first line
        # end: index = last line -2 ==> line before last line

        for i in reversed(range(
                source_range.start.line, source_range.end.line - 1)):
            del newfile[i]

        # remove leftover empty lines
        # the first line here is actually the former `source_range.end.line -1`
        if newfile[source_range.start.line] == "":
            del newfile[source_range.start.line]
        if newfile[source_range.start.line - 1] == "":
            del newfile[source_range.start.line - 1]

    return newfile


def remove_result_ranges_diffs(result_list, file_dict):
    """
    Calculates the diffs to all files in file_dict that describe the removal of
    each respective result's affected code.

    :param result_list: list of results
    :param file_dict:   dict of file contents
    :return:            returnvalue[result][file] is a diff of the changes the
                        removal of this result's affected code would cause for
                        the file.
    """
    result_diff_dict_dict = {}
    for original_result in result_list:
        mod_file_dict = copy.deepcopy(file_dict)

        # gather all source ranges from this result
        source_ranges = []

        # SourceRanges must be sorted backwards and overlaps must be eliminated
        # this way, the deletion based on sourceRanges is not offset by
        # previous deletions in the same line that invalidate the indices.
        previous = None

        for source_range in sorted(original_result.affected_code, reverse=True):
            # previous exists and overlaps
            if previous is not None and source_range.overlaps(previous):
                combined_sr = SourceRange.join(previous, source_range)
                previous = combined_sr
            elif previous is None:
                previous = source_range
            # previous exists but it doesn't overlap
            else:
                source_ranges.append(previous)
                previous = source_range
        # don't forget last entry if there were any:
        if previous:
            source_ranges.append(previous)

        for source_range in source_ranges:
            file_name = source_range.file
            new_file = remove_range(mod_file_dict[file_name],
                                    source_range)
            mod_file_dict[file_name] = new_file

        diff_dict = {}
        for file_name in file_dict:
            diff_dict[file_name] = Diff.from_string_arrays(
                file_dict[file_name],
                mod_file_dict[file_name])

        result_diff_dict_dict[original_result] = diff_dict

    return result_diff_dict_dict


def ensure_files_present(original_file_dict, modified_file_dict):
    """
    Ensures that all files are available as keys in both dicts. Return a
    dictionary of renamed files.

    :param original_file_dict: Dict of lists of file contents before  changes
    :param modified_file_dict: Dict of lists of file contents after changes
    """
    affected_files = set(original_file_dict.keys()).union(
        set(modified_file_dict.keys()))
    original_unique_files = affected_files - set(modified_file_dict.keys())
    renamed_files_dict = {}
    for file in filter(
            lambda filter_file: filter_file not in original_file_dict,
            affected_files):
        for comparable_file in original_unique_files:
            s = SequenceMatcher(
                None,
                ''.join(modified_file_dict[file]),
                ''.join(original_file_dict[comparable_file]))
            if s.real_quick_ratio() >= 0.5 and s.ratio() > 0.5:
                renamed_files_dict[comparable_file] = file
                break
        else:
            original_file_dict[file] = []
    for file in filter(
            lambda filter_file: filter_file not in modified_file_dict,
            affected_files):
            modified_file_dict[file] = []

    return renamed_files_dict


1			import copy
2			from difflib import SequenceMatcher
3
4			from coalib.results.Diff import ConflictError, Diff
5			from coalib.results.SourceRange import SourceRange
6
7
8			def filter_results(original_file_dict,
9			modified_file_dict,
10			original_results,
11			modified_results):
12			"""
13			Filters results for such ones that are unique across file changes
14
15			:param original_file_dict: Dict of lists of file contents before changes
16			:param modified_file_dict: Dict of lists of file contents after changes
17			:param original_results: List of results of the old files
18			:param modified_results: List of results of the new files
19			:return: List of results from new files that are unique
20			from all those that existed in the old changes
21			"""
22
23			renamed_files = ensure_files_present(original_file_dict,
24			modified_file_dict)
25			# diffs_dict[file] is a diff between the original and modified file
26			diffs_dict = {}
27			for file in original_file_dict:
28			if file in renamed_files:
29			diffs_dict[file] = Diff.from_string_arrays(
30			original_file_dict[file],
31			modified_file_dict[renamed_files[file]])
32			else:
33			diffs_dict[file] = Diff.from_string_arrays(
34			original_file_dict[file],
35			modified_file_dict[file])
36
37			orig_result_diff_dict_dict = remove_result_ranges_diffs(original_results,
38			original_file_dict)
39
40			mod_result_diff_dict_dict = remove_result_ranges_diffs(modified_results,
41			modified_file_dict)
42
43			unique_results = []
44
45			for m_r in reversed(modified_results):
46			unique = True
47
48			for o_r in original_results:
49
50			if basics_match(o_r, m_r):
51			if source_ranges_match(original_file_dict,
52			diffs_dict,
53			orig_result_diff_dict_dict[o_r],
54			mod_result_diff_dict_dict[m_r],
55			renamed_files):
56
57			# at least one original result matches completely
58			unique = False
59			break
60			if unique:
61			unique_results.append(m_r)
62
63			return unique_results
64
65
66			def basics_match(original_result,
67			modified_result):
68			"""
69			Checks whether the following properties of two results match:
70			* origin
71			* message
72			* severity
73			* debug_msg
74
75			:param original_result: A result of the old files
76			:param modified_result: A result of the new files
77			:return: Boolean value whether or not the properties match
78			"""
79
80			return all(getattr(original_result, member) ==
81			getattr(modified_result, member)
82			for member in ['origin', 'message', 'severity', 'debug_msg'])
83
84
85			def source_ranges_match(original_file_dict,
86			diff_dict,
87			original_result_diff_dict,
88			modified_result_diff_dict,
89			renamed_files):
90			"""
91			Checks whether the SourceRanges of two results match
92
93			:param original_file_dict: Dict of lists of file contents before changes
94			:param diff_dict: Dict of diffs describing the changes per file
95			:param original_result_diff_dict: diff for each file for this result
96			:param modified_result_diff_dict: guess
97			:param renamed_files: A dictionary containing file renamings across runs
98			:return: Boolean value whether the SourceRanges match
99			"""
100			for file_name in original_file_dict:
101			if file_name in renamed_files:
102			mod_file_name = renamed_files[file_name]
103			else:
104			mod_file_name = file_name
105
106			try: # fails if the affected range of the result get's modified
107			original_total_diff = (diff_dict[file_name] +
108			original_result_diff_dict[file_name])
109			except ConflictError:
110			return False
111
112			# original file with file_diff and original_diff applied
113			original_total_file = original_total_diff.modified
114			# modified file with modified_diff applied
115			modified_total_file = modified_result_diff_dict[mod_file_name].modified
116			if original_total_file != modified_total_file:
117			return False
118			return True
119
120
121			def remove_range(file_contents, source_range):
122			"""
123			removes the chars covered by the sourceRange from the file
124
125			:param file_contents: list of lines in the file
126			:param source_range: Source Range
127			:return: list of file contents without specified chars removed
128			"""
129			if not file_contents:
130			return []
131
132			newfile = list(file_contents)
133			# attention: line numbers in the SourceRange are human-readable,
134			# list indices start with 0
135
136			source_range = source_range.expand(file_contents)
137
138			if source_range.start.line == source_range.end.line:
139			# if it's all in one line, replace the line by it's beginning and end
140			newfile[source_range.start.line - 1] = (
141			newfile[source_range.start.line - 1][:source_range.start.column-1]
142			+ newfile[source_range.start.line - 1][source_range.end.column:])
143			if newfile[source_range.start.line - 1] == "":
144			del newfile[source_range.start.line - 1]
145			else:
146			# cut away after start
147			newfile[source_range.start.line - 1] = (
148			newfile[source_range.start.line - 1][:source_range.start.column-1])
149
150			# cut away before end
151			newfile[source_range.end.line - 1] = (
152			newfile[source_range.end.line - 1][source_range.end.column:])
153
154			# start: index = first line number ==> line after first line
155			# end: index = last line -2 ==> line before last line
156
157			for i in reversed(range(
158			source_range.start.line, source_range.end.line - 1)):
159			del newfile[i]
160
161			# remove leftover empty lines
162			# the first line here is actually the former `source_range.end.line -1`
163			if newfile[source_range.start.line] == "":
164			del newfile[source_range.start.line]
165			if newfile[source_range.start.line - 1] == "":
166			del newfile[source_range.start.line - 1]
167
168			return newfile
169
170
171			def remove_result_ranges_diffs(result_list, file_dict):
172			"""
173			Calculates the diffs to all files in file_dict that describe the removal of
174			each respective result's affected code.
175
176			:param result_list: list of results
177			:param file_dict: dict of file contents
178			:return: returnvalue[result][file] is a diff of the changes the
179			removal of this result's affected code would cause for
180			the file.
181			"""
182			result_diff_dict_dict = {}
183			for original_result in result_list:
184			mod_file_dict = copy.deepcopy(file_dict)
185
186			# gather all source ranges from this result
187			source_ranges = []
188
189			# SourceRanges must be sorted backwards and overlaps must be eliminated
190			# this way, the deletion based on sourceRanges is not offset by
191			# previous deletions in the same line that invalidate the indices.
192			previous = None
193
194			for source_range in sorted(original_result.affected_code, reverse=True):
195			# previous exists and overlaps
196			if previous is not None and source_range.overlaps(previous):
197			combined_sr = SourceRange.join(previous, source_range)
198			previous = combined_sr
199			elif previous is None:
200			previous = source_range
201			# previous exists but it doesn't overlap
202			else:
203			source_ranges.append(previous)
204			previous = source_range
205			# don't forget last entry if there were any:
206			if previous:
207			source_ranges.append(previous)
208
209			for source_range in source_ranges:
210			file_name = source_range.file
211			new_file = remove_range(mod_file_dict[file_name],
212			source_range)
213			mod_file_dict[file_name] = new_file
214
215			diff_dict = {}
216			for file_name in file_dict:
217			diff_dict[file_name] = Diff.from_string_arrays(
218			file_dict[file_name],
219			mod_file_dict[file_name])
220
221			result_diff_dict_dict[original_result] = diff_dict
222
223			return result_diff_dict_dict
224
225
226			def ensure_files_present(original_file_dict, modified_file_dict):
227			"""
228			Ensures that all files are available as keys in both dicts. Return a
229			dictionary of renamed files.
230
231			:param original_file_dict: Dict of lists of file contents before changes
232			:param modified_file_dict: Dict of lists of file contents after changes
233			"""
234			affected_files = set(original_file_dict.keys()).union(
235			set(modified_file_dict.keys()))
236			original_unique_files = affected_files - set(modified_file_dict.keys())
237			renamed_files_dict = {}
238			for file in filter(
239			lambda filter_file: filter_file not in original_file_dict,
240			affected_files):
241			for comparable_file in original_unique_files:
242			s = SequenceMatcher(
243			None,
244			''.join(modified_file_dict[file]),
245			''.join(original_file_dict[comparable_file]))
246			if s.real_quick_ratio() >= 0.5 and s.ratio() > 0.5:
247			renamed_files_dict[comparable_file] = file
248			break
249			else:
250			original_file_dict[file] = []
251			for file in filter(
252			lambda filter_file: filter_file not in modified_file_dict,
253			affected_files):
254			modified_file_dict[file] = []
			0 ignored issues – show Coding Style introduced 2016-05-10 07:23 UTC by Report Bug Copy Issue Report The indentation here looks off. 8 spaces were expected, but 12 were found. Loading history...
255			return renamed_files_dict
256

coala-analyzer / coala

Pull Request — master (#2100)

ensure_files_present() F

Complexity

Size

Duplication

Importance

Duplication Side-by-Side

Filter issues like