ensure_files_present() - Code Metrics - Inspection of "Udayan/res filter" - coala-analyzer/coala - Measure and Improve Code Quality continuously with Scrutinizer

Completed

Pull Request — master (#2100)

by Udayan

created 2016-05-01 19:32 UTC

ensure_files_present() D

↳ Parent: Project

Complexity

Conditions

Size

Total Lines

Duplication

Lines	0
Ratio	0 %

Importance

Changes	2
Bugs	0	Features	0

Metric	Value
cc	8
dl	0
loc	30
rs	4
c	2
b	0
f	0

import copy
from difflib import SequenceMatcher

from coalib.results.Diff import ConflictError, Diff
from coalib.results.SourceRange import SourceRange


def filter_results(original_file_dict,
                   modified_file_dict,
                   original_results,
                   modified_results):
    """
    Filters results for such ones that are unique across file changes

    :param original_file_dict: Dict of lists of file contents before  changes
    :param modified_file_dict: Dict of lists of file contents after changes
    :param original_results:   List of results of the old files
    :param modified_results:   List of results of the new files
    :return:                   List of results from new files that are unique
                               from all those that existed in the old changes
    """

    renamed_files = ensure_files_present(original_file_dict,
                                         modified_file_dict)
    # diffs_dict[file] is a diff between the original and modified file
    diffs_dict = {}
    for file in original_file_dict:
        if file in renamed_files:
            diffs_dict[file] = Diff.from_string_arrays(
                original_file_dict[file],
                modified_file_dict[renamed_files[file]])
        else:
            diffs_dict[file] = Diff.from_string_arrays(
                original_file_dict[file],
                modified_file_dict[file])

    orig_result_diff_dict_dict = remove_result_ranges_diffs(original_results,
                                                            original_file_dict)

    mod_result_diff_dict_dict = remove_result_ranges_diffs(modified_results,
                                                           modified_file_dict)

    unique_results = []

    for m_r in reversed(modified_results):
        unique = True

        for o_r in original_results:

            if basics_match(o_r, m_r, renamed_files):
                if source_ranges_match(original_file_dict,
                                       diffs_dict,
                                       orig_result_diff_dict_dict[o_r],
                                       mod_result_diff_dict_dict[m_r],
                                       renamed_files):

                    # at least one original result matches completely
                    unique = False
                    break
        if unique:
            unique_results.append(m_r)

    return unique_results


def basics_match(original_result,
                 modified_result,
                 renamed_files):
    """
    Checks whether the following properties of two results match:
    * origin
    * message
    * severity
    * debug_msg

    :param original_result: A result of the old files
    :param modified_result: A result of the new files
    :param renamed_files:   A dictionary containing file renamings across runs
    :return:                Boolean value whether or not the properties match
    """

    return all(getattr(original_result, member) ==
               getattr(modified_result, member)
               for member in ['origin','message', 'severity', 'debug_msg'])



def source_ranges_match(original_file_dict,
                        diff_dict,
                        original_result_diff_dict,
                        modified_result_diff_dict,
                        renamed_files):
    """
    Checks whether the SourceRanges of two results match

    :param original_file_dict: Dict of lists of file contents before changes
    :param diff_dict:          Dict of diffs describing the changes per file
    :param original_result_diff_dict: diff for each file for this result
    :param modified_result_diff_dict: guess
    :param renamed_files:   A dictionary containing file renamings across runs
    :return:                     Boolean value whether the SourceRanges match
    """
    for file_name in original_file_dict:
        mod_file_name = file_name
        if file_name in renamed_files:
            mod_file_name = renamed_files[file_name]

        try:  # fails if the affected range of the result get's modified
            original_total_diff = (diff_dict[file_name] +
                                   original_result_diff_dict[file_name])
        except ConflictError:
            return False

        # original file with file_diff and original_diff applied
        original_total_file = original_total_diff.modified
        # modified file with modified_diff applied
        modified_total_file = modified_result_diff_dict[mod_file_name].modified
        if original_total_file != modified_total_file:
            return False
    return True


def remove_range(file_contents, source_range):
    """
    removes the chars covered by the sourceRange from the file

    :param file_contents: list of lines in the file
    :param source_range:  Source Range
    :return:              list of file contents without specified chars removed
    """
    if not file_contents:
        return []

    newfile = list(file_contents)
    # attention: line numbers in the SourceRange are human-readable,
    # list indices start with 0

    source_range = source_range.expand(file_contents)

    if source_range.start.line == source_range.end.line:
        # if it's all in one line, replace the line by it's beginning and end
        newfile[source_range.start.line - 1] = (
            newfile[source_range.start.line - 1][:source_range.start.column-1]
            + newfile[source_range.start.line - 1][source_range.end.column:])
        if newfile[source_range.start.line - 1] == "":
            del newfile[source_range.start.line - 1]
    else:
        # cut away after start
        newfile[source_range.start.line - 1] = (
            newfile[source_range.start.line - 1][:source_range.start.column-1])

        # cut away before end
        newfile[source_range.end.line - 1] = (
            newfile[source_range.end.line - 1][source_range.end.column:])

        # start: index = first line number ==> line after first line
        # end: index = last line -2 ==> line before last line

        for i in reversed(range(
                source_range.start.line, source_range.end.line - 1)):
            del newfile[i]

        # remove leftover empty lines
        # the first line here is actually the former `source_range.end.line -1`
        if newfile[source_range.start.line] == "":
            del newfile[source_range.start.line]
        if newfile[source_range.start.line - 1] == "":
            del newfile[source_range.start.line - 1]

    return newfile


def remove_result_ranges_diffs(result_list, file_dict):
    """
    Calculates the diffs to all files in file_dict that describe the removal of
    each respective result's affected code.

    :param result_list: list of results
    :param file_dict:   dict of file contents
    :return:            returnvalue[result][file] is a diff of the changes the
                        removal of this result's affected code would cause for
                        the file.
    """
    result_diff_dict_dict = {}
    for original_result in result_list:
        mod_file_dict = copy.deepcopy(file_dict)

        # gather all source ranges from this result
        source_ranges = []

        # SourceRanges must be sorted backwards and overlaps must be eliminated
        # this way, the deletion based on sourceRanges is not offset by
        # previous deletions in the same line that invalidate the indices.
        previous = None

        for source_range in sorted(original_result.affected_code, reverse=True):
            # previous exists and overlaps
            if previous is not None and source_range.overlaps(previous):
                combined_sr = SourceRange.join(previous, source_range)
                previous = combined_sr
            elif previous is None:
                previous = source_range
            # previous exists but it doesn't overlap
            else:
                source_ranges.append(previous)
                previous = source_range
        # don't forget last entry if there were any:
        if previous:
            source_ranges.append(previous)

        for source_range in source_ranges:
            file_name = source_range.file
            new_file = remove_range(mod_file_dict[file_name],
                                    source_range)
            mod_file_dict[file_name] = new_file

        diff_dict = {}
        for file_name in file_dict:
            diff_dict[file_name] = Diff.from_string_arrays(
                file_dict[file_name],
                mod_file_dict[file_name])

        result_diff_dict_dict[original_result] = diff_dict

    return result_diff_dict_dict


def ensure_files_present(original_file_dict, modified_file_dict):
    """
    Ensures that all files are available as keys in both dicts. Return a
    dictionary of renamed files.

    :param original_file_dict: Dict of lists of file contents before  changes
    :param modified_file_dict: Dict of lists of file contents after changes
    """
    affected_files = set(original_file_dict.keys()).union(
        set(modified_file_dict.keys()))
    original_unique_files = affected_files - set(modified_file_dict.keys())
    renamed_files_dict = {}
    for file in affected_files:
        if file not in original_file_dict:
            renamed = 0
            for comparable_file in original_unique_files:
                s = SequenceMatcher(
                    None,
                    ''.join(modified_file_dict[file]),
                    ''.join(original_file_dict[comparable_file]))
                if s.quick_ratio() > 0.5:
                    if s.ratio() > 0.5:
                        renamed_files_dict[comparable_file] = file
                        renamed = 1
                        break
            if renamed == 0:
                original_file_dict[file] = []
        if file not in modified_file_dict:
            modified_file_dict[file] = []
    return renamed_files_dict


1			import copy
2			from difflib import SequenceMatcher
3
4			from coalib.results.Diff import ConflictError, Diff
5			from coalib.results.SourceRange import SourceRange
6
7
8			def filter_results(original_file_dict,
9			modified_file_dict,
10			original_results,
11			modified_results):
12			"""
13			Filters results for such ones that are unique across file changes
14
15			:param original_file_dict: Dict of lists of file contents before changes
16			:param modified_file_dict: Dict of lists of file contents after changes
17			:param original_results: List of results of the old files
18			:param modified_results: List of results of the new files
19			:return: List of results from new files that are unique
20			from all those that existed in the old changes
21			"""
22
23			renamed_files = ensure_files_present(original_file_dict,
24			modified_file_dict)
25			# diffs_dict[file] is a diff between the original and modified file
26			diffs_dict = {}
27			for file in original_file_dict:
28			if file in renamed_files:
29			diffs_dict[file] = Diff.from_string_arrays(
30			original_file_dict[file],
31			modified_file_dict[renamed_files[file]])
32			else:
33			diffs_dict[file] = Diff.from_string_arrays(
34			original_file_dict[file],
35			modified_file_dict[file])
36
37			orig_result_diff_dict_dict = remove_result_ranges_diffs(original_results,
38			original_file_dict)
39
40			mod_result_diff_dict_dict = remove_result_ranges_diffs(modified_results,
41			modified_file_dict)
42
43			unique_results = []
44
45			for m_r in reversed(modified_results):
46			unique = True
47
48			for o_r in original_results:
49
50			if basics_match(o_r, m_r, renamed_files):
51			if source_ranges_match(original_file_dict,
52			diffs_dict,
53			orig_result_diff_dict_dict[o_r],
54			mod_result_diff_dict_dict[m_r],
55			renamed_files):
56
57			# at least one original result matches completely
58			unique = False
59			break
60			if unique:
61			unique_results.append(m_r)
62
63			return unique_results
64
65
66			def basics_match(original_result,
67			modified_result,
68			renamed_files):
69			"""
70			Checks whether the following properties of two results match:
71			* origin
72			* message
73			* severity
74			* debug_msg
75
76			:param original_result: A result of the old files
77			:param modified_result: A result of the new files
78			:param renamed_files: A dictionary containing file renamings across runs
79			:return: Boolean value whether or not the properties match
80			"""
81
82			return all(getattr(original_result, member) ==
83			getattr(modified_result, member)
84			for member in ['origin','message', 'severity', 'debug_msg'])
			0 ignored issues – show Coding Style introduced 2016-05-01 19:33 UTC by Report Bug Copy Issue Report Exactly one space required after comma for member in ['origin','message', 'severity', 'debug_msg']) ^ Loading history...
85
86
87			def source_ranges_match(original_file_dict,
88			diff_dict,
89			original_result_diff_dict,
90			modified_result_diff_dict,
91			renamed_files):
92			"""
93			Checks whether the SourceRanges of two results match
94
95			:param original_file_dict: Dict of lists of file contents before changes
96			:param diff_dict: Dict of diffs describing the changes per file
97			:param original_result_diff_dict: diff for each file for this result
98			:param modified_result_diff_dict: guess
99			:param renamed_files: A dictionary containing file renamings across runs
100			:return: Boolean value whether the SourceRanges match
101			"""
102			for file_name in original_file_dict:
103			mod_file_name = file_name
104			if file_name in renamed_files:
105			mod_file_name = renamed_files[file_name]
106
107			try: # fails if the affected range of the result get's modified
108			original_total_diff = (diff_dict[file_name] +
109			original_result_diff_dict[file_name])
110			except ConflictError:
111			return False
112
113			# original file with file_diff and original_diff applied
114			original_total_file = original_total_diff.modified
115			# modified file with modified_diff applied
116			modified_total_file = modified_result_diff_dict[mod_file_name].modified
117			if original_total_file != modified_total_file:
118			return False
119			return True
120
121
122			def remove_range(file_contents, source_range):
123			"""
124			removes the chars covered by the sourceRange from the file
125
126			:param file_contents: list of lines in the file
127			:param source_range: Source Range
128			:return: list of file contents without specified chars removed
129			"""
130			if not file_contents:
131			return []
132
133			newfile = list(file_contents)
134			# attention: line numbers in the SourceRange are human-readable,
135			# list indices start with 0
136
137			source_range = source_range.expand(file_contents)
138
139			if source_range.start.line == source_range.end.line:
140			# if it's all in one line, replace the line by it's beginning and end
141			newfile[source_range.start.line - 1] = (
142			newfile[source_range.start.line - 1][:source_range.start.column-1]
143			+ newfile[source_range.start.line - 1][source_range.end.column:])
144			if newfile[source_range.start.line - 1] == "":
145			del newfile[source_range.start.line - 1]
146			else:
147			# cut away after start
148			newfile[source_range.start.line - 1] = (
149			newfile[source_range.start.line - 1][:source_range.start.column-1])
150
151			# cut away before end
152			newfile[source_range.end.line - 1] = (
153			newfile[source_range.end.line - 1][source_range.end.column:])
154
155			# start: index = first line number ==> line after first line
156			# end: index = last line -2 ==> line before last line
157
158			for i in reversed(range(
159			source_range.start.line, source_range.end.line - 1)):
160			del newfile[i]
161
162			# remove leftover empty lines
163			# the first line here is actually the former `source_range.end.line -1`
164			if newfile[source_range.start.line] == "":
165			del newfile[source_range.start.line]
166			if newfile[source_range.start.line - 1] == "":
167			del newfile[source_range.start.line - 1]
168
169			return newfile
170
171
172			def remove_result_ranges_diffs(result_list, file_dict):
173			"""
174			Calculates the diffs to all files in file_dict that describe the removal of
175			each respective result's affected code.
176
177			:param result_list: list of results
178			:param file_dict: dict of file contents
179			:return: returnvalue[result][file] is a diff of the changes the
180			removal of this result's affected code would cause for
181			the file.
182			"""
183			result_diff_dict_dict = {}
184			for original_result in result_list:
185			mod_file_dict = copy.deepcopy(file_dict)
186
187			# gather all source ranges from this result
188			source_ranges = []
189
190			# SourceRanges must be sorted backwards and overlaps must be eliminated
191			# this way, the deletion based on sourceRanges is not offset by
192			# previous deletions in the same line that invalidate the indices.
193			previous = None
194
195			for source_range in sorted(original_result.affected_code, reverse=True):
196			# previous exists and overlaps
197			if previous is not None and source_range.overlaps(previous):
198			combined_sr = SourceRange.join(previous, source_range)
199			previous = combined_sr
200			elif previous is None:
201			previous = source_range
202			# previous exists but it doesn't overlap
203			else:
204			source_ranges.append(previous)
205			previous = source_range
206			# don't forget last entry if there were any:
207			if previous:
208			source_ranges.append(previous)
209
210			for source_range in source_ranges:
211			file_name = source_range.file
212			new_file = remove_range(mod_file_dict[file_name],
213			source_range)
214			mod_file_dict[file_name] = new_file
215
216			diff_dict = {}
217			for file_name in file_dict:
218			diff_dict[file_name] = Diff.from_string_arrays(
219			file_dict[file_name],
220			mod_file_dict[file_name])
221
222			result_diff_dict_dict[original_result] = diff_dict
223
224			return result_diff_dict_dict
225
226
227			def ensure_files_present(original_file_dict, modified_file_dict):
228			"""
229			Ensures that all files are available as keys in both dicts. Return a
230			dictionary of renamed files.
231
232			:param original_file_dict: Dict of lists of file contents before changes
233			:param modified_file_dict: Dict of lists of file contents after changes
234			"""
235			affected_files = set(original_file_dict.keys()).union(
236			set(modified_file_dict.keys()))
237			original_unique_files = affected_files - set(modified_file_dict.keys())
238			renamed_files_dict = {}
239			for file in affected_files:
240			if file not in original_file_dict:
241			renamed = 0
242			for comparable_file in original_unique_files:
243			s = SequenceMatcher(
244			None,
245			''.join(modified_file_dict[file]),
246			''.join(original_file_dict[comparable_file]))
247			if s.quick_ratio() > 0.5:
248			if s.ratio() > 0.5:
249			renamed_files_dict[comparable_file] = file
250			renamed = 1
251			break
252			if renamed == 0:
253			original_file_dict[file] = []
254			if file not in modified_file_dict:
255			modified_file_dict[file] = []
256			return renamed_files_dict
257

coala-analyzer / coala

Pull Request — master (#2100)

ensure_files_present() D

Complexity

Size

Duplication

Importance

Duplication Side-by-Side

Filter issues like