bears.codeclone_detection.get_difference() - Code Metrics - Inspection of "Linters" - coala-analyzer/coala - Measure and Improve Code Quality continuously with Scrutinizer

Completed

Pull Request — master (#1109)

by Abdeali

created 2015-12-07 13:21 UTC

bears.codeclone_detection.get_difference() B

↳ Parent: Project

Complexity

Conditions

Size

Total Lines

Duplication

Lines	0
Ratio	0 %

Metric	Value
cc	1
dl	0
loc	33
rs	8.8571

import functools
from itertools import combinations

from coalib.misc.StringConverter import StringConverter
from coalib.results.HiddenResult import HiddenResult
from coalib.settings.Setting import typed_ordered_dict, path_list
from coalib.collecting.Collectors import collect_dirs
from coalib.bears.GlobalBear import GlobalBear
from bears.codeclone_detection.ClangCountVectorCreator import (
    ClangCountVectorCreator)
from bears.codeclone_detection.ClangCountingConditions import condition_dict
from bears.codeclone_detection.CloneDetectionRoutines import (
    compare_functions,
    get_count_matrices)


# counting_condition_dict is a function object generated by typed_dict. This
# function takes a setting and creates a dictionary out of it while it
# converts all keys to counting condition function objects (via the
# condition_dict) and all values to floats while unset values default to 1.
counting_condition_dict = typed_ordered_dict(
    lambda setting: condition_dict[str(setting).lower()],
    float,
    1)

default_cc_dict = counting_condition_dict(StringConverter(
    """
used: 0,
returned: 1.4,
is_condition: 0,
in_condition: 1.4,
in_second_level_condition: 1.4,
in_third_level_condition: 1.0,
is_assignee: 0,
is_assigner: 0.6,
loop_content: 0,
second_level_loop_content,
third_level_loop_content,
is_param: 2,
is_called: 1.4,
is_call_param: 0.0,
in_sum: 2.0,
in_product: 0,
in_binary_operation,
member_accessed"""))


def get_difference(function_pair,
                   count_matrices,
                   average_calculation,
                   poly_postprocessing,
                   exp_postprocessing):
    """
    Retrieves the difference between two functions using the munkres algorithm.

    :param function_pair:       A tuple containing both indices for the
                                count_matrices dictionary.
    :param count_matrices:      A dictionary holding CMs.
    :param average_calculation: If set to true the difference calculation
                                function will take the average of all variable
                                differences as the difference, else it will
                                normalize the function as a whole and thus
                                weighting in variables dependent on their size.
    :param poly_postprocessing: If set to true, the difference value of big
                                function pairs will be reduced using a
                                polynomial approach.
    :param exp_postprocessing:  If set to true, the difference value of big
                                function pairs will be reduced using an
                                exponential approach.
    :return:                    A tuple containing both function ids and their
                                difference.
    """
    function_1, function_2 = function_pair
    return (function_1,
            function_2,
            compare_functions(count_matrices[function_1],
                              count_matrices[function_2],
                              average_calculation,
                              poly_postprocessing,
                              exp_postprocessing))


class ClangFunctionDifferenceBear(GlobalBear):
    def run(self,
            counting_conditions: counting_condition_dict=default_cc_dict,
            average_calculation: bool=False,
            poly_postprocessing: bool=True,
            exp_postprocessing: bool=False,
            extra_include_paths: path_list=()):
        '''
        Retrieves similarities for code clone detection. Those can be reused in
        another bear to produce results.

        Postprocessing may be done because small functions are less likely to
        be clones at the same difference value than big functions which may
        provide a better refactoring opportunity for the user.

        :param counting_conditions: A comma seperated list of counting
                                    conditions. Possible values are: used,
                                    returned, is_condition, in_condition,
                                    in_second_level_condition,
                                    in_third_level_condition, is_assignee,
                                    is_assigner, loop_content,
                                    second_level_loop_content,
                                    third_level_loop_content, is_param,
                                    in_sum, in_product, in_binary_operation,
                                    member_accessed.
                                    Weightings can be assigned to each
                                    condition due to providing a dict
                                    value, i.e. having used weighted in
                                    half as much as other conditions would
                                    simply be: "used: 0.5, is_assignee".
                                    Weightings default to 1 if unset.
        :param average_calculation: If set to true the difference calculation
                                    function will take the average of all
                                    variable differences as the difference,
                                    else it will normalize the function as a
                                    whole and thus weighting in variables
                                    dependent on their size.
        :param poly_postprocessing: If set to true, the difference value of big
                                    function pairs will be reduced using a
                                    polynomial approach.
        :param extra_include_paths: A list containing additional include paths.
        :param exp_postprocessing:  If set to true, the difference value of big
                                    function pairs will be reduced using an
                                    exponential approach.
        '''
        self.debug("Using the following counting conditions:")
        for key, val in counting_conditions.items():
            self.debug(" *", key.__name__, "(weighting: {})".format(val))

        self.debug("Creating count matrices...")
        count_matrices = get_count_matrices(
            ClangCountVectorCreator(list(counting_conditions.keys()),
                                    list(counting_conditions.values())),
            list(self.file_dict.keys()),
            lambda prog: self.debug("{:2.4f}%...".format(prog)),
            self.section["files"].origin,
            collect_dirs(extra_include_paths))

        self.debug("Calculating differences...")

        differences = []
        function_count = len(count_matrices)
        # Thats n over 2, hardcoded to simplify calculation
        combination_length = function_count * (function_count-1) / 2
        partial_get_difference = functools.partial(
            get_difference,
            count_matrices=count_matrices,
            average_calculation=average_calculation,
            poly_postprocessing=poly_postprocessing,
            exp_postprocessing=exp_postprocessing)

        for i, elem in enumerate(
                map(partial_get_difference,
                    [(f1, f2) for f1, f2 in combinations(count_matrices, 2)])):
            if i % 50 == 0:
                self.debug("{:2.4f}%...".format(100*i/combination_length))
            differences.append(elem)

        yield HiddenResult(self, differences)
        yield HiddenResult(self, count_matrices)


1			import functools
2			from itertools import combinations
3
4			from coalib.misc.StringConverter import StringConverter
5			from coalib.results.HiddenResult import HiddenResult
6			from coalib.settings.Setting import typed_ordered_dict, path_list
7			from coalib.collecting.Collectors import collect_dirs
8			from coalib.bears.GlobalBear import GlobalBear
9			from bears.codeclone_detection.ClangCountVectorCreator import (
10			ClangCountVectorCreator)
11			from bears.codeclone_detection.ClangCountingConditions import condition_dict
12			from bears.codeclone_detection.CloneDetectionRoutines import (
13			compare_functions,
14			get_count_matrices)
15
16
17			# counting_condition_dict is a function object generated by typed_dict. This
18			# function takes a setting and creates a dictionary out of it while it
19			# converts all keys to counting condition function objects (via the
20			# condition_dict) and all values to floats while unset values default to 1.
21			counting_condition_dict = typed_ordered_dict(
22			lambda setting: condition_dict[str(setting).lower()],
23			float,
24			1)
25
26			default_cc_dict = counting_condition_dict(StringConverter(
27			"""
28			used: 0,
29			returned: 1.4,
30			is_condition: 0,
31			in_condition: 1.4,
32			in_second_level_condition: 1.4,
33			in_third_level_condition: 1.0,
34			is_assignee: 0,
35			is_assigner: 0.6,
36			loop_content: 0,
37			second_level_loop_content,
38			third_level_loop_content,
39			is_param: 2,
40			is_called: 1.4,
41			is_call_param: 0.0,
42			in_sum: 2.0,
43			in_product: 0,
44			in_binary_operation,
45			member_accessed"""))
46
47
48			def get_difference(function_pair,
49			count_matrices,
50			average_calculation,
51			poly_postprocessing,
52			exp_postprocessing):
53			"""
54			Retrieves the difference between two functions using the munkres algorithm.
55
56			:param function_pair: A tuple containing both indices for the
57			count_matrices dictionary.
58			:param count_matrices: A dictionary holding CMs.
59			:param average_calculation: If set to true the difference calculation
60			function will take the average of all variable
61			differences as the difference, else it will
62			normalize the function as a whole and thus
63			weighting in variables dependent on their size.
64			:param poly_postprocessing: If set to true, the difference value of big
65			function pairs will be reduced using a
66			polynomial approach.
67			:param exp_postprocessing: If set to true, the difference value of big
68			function pairs will be reduced using an
69			exponential approach.
70			:return: A tuple containing both function ids and their
71			difference.
72			"""
73			function_1, function_2 = function_pair
74			return (function_1,
75			function_2,
76			compare_functions(count_matrices[function_1],
77			count_matrices[function_2],
78			average_calculation,
79			poly_postprocessing,
80			exp_postprocessing))
81
82
83			class ClangFunctionDifferenceBear(GlobalBear):
84			def run(self,
85			counting_conditions: counting_condition_dict=default_cc_dict,
86			average_calculation: bool=False,
87			poly_postprocessing: bool=True,
88			exp_postprocessing: bool=False,
89			extra_include_paths: path_list=()):
90			'''
91			Retrieves similarities for code clone detection. Those can be reused in
92			another bear to produce results.
93
94			Postprocessing may be done because small functions are less likely to
95			be clones at the same difference value than big functions which may
96			provide a better refactoring opportunity for the user.
97
98			:param counting_conditions: A comma seperated list of counting
99			conditions. Possible values are: used,
100			returned, is_condition, in_condition,
101			in_second_level_condition,
102			in_third_level_condition, is_assignee,
103			is_assigner, loop_content,
104			second_level_loop_content,
105			third_level_loop_content, is_param,
106			in_sum, in_product, in_binary_operation,
107			member_accessed.
108			Weightings can be assigned to each
109			condition due to providing a dict
110			value, i.e. having used weighted in
111			half as much as other conditions would
112			simply be: "used: 0.5, is_assignee".
113			Weightings default to 1 if unset.
114			:param average_calculation: If set to true the difference calculation
115			function will take the average of all
116			variable differences as the difference,
117			else it will normalize the function as a
118			whole and thus weighting in variables
119			dependent on their size.
120			:param poly_postprocessing: If set to true, the difference value of big
121			function pairs will be reduced using a
122			polynomial approach.
123			:param extra_include_paths: A list containing additional include paths.
124			:param exp_postprocessing: If set to true, the difference value of big
125			function pairs will be reduced using an
126			exponential approach.
127			'''
128			self.debug("Using the following counting conditions:")
129			for key, val in counting_conditions.items():
130			self.debug(" *", key.__name__, "(weighting: {})".format(val))
131
132			self.debug("Creating count matrices...")
133			count_matrices = get_count_matrices(
134			ClangCountVectorCreator(list(counting_conditions.keys()),
135			list(counting_conditions.values())),
136			list(self.file_dict.keys()),
137			lambda prog: self.debug("{:2.4f}%...".format(prog)),
138			self.section["files"].origin,
139			collect_dirs(extra_include_paths))
140
141			self.debug("Calculating differences...")
142
143			differences = []
144			function_count = len(count_matrices)
145			# Thats n over 2, hardcoded to simplify calculation
146			combination_length = function_count * (function_count-1) / 2
147			partial_get_difference = functools.partial(
148			get_difference,
149			count_matrices=count_matrices,
150			average_calculation=average_calculation,
151			poly_postprocessing=poly_postprocessing,
152			exp_postprocessing=exp_postprocessing)
153
154			for i, elem in enumerate(
155			map(partial_get_difference,
156			[(f1, f2) for f1, f2 in combinations(count_matrices, 2)])):
157			if i % 50 == 0:
158			self.debug("{:2.4f}%...".format(100*i/combination_length))
159			differences.append(elem)
160
161			yield HiddenResult(self, differences)
162			yield HiddenResult(self, count_matrices)
163

coala-analyzer / coala

Pull Request — master (#1109)

bears.codeclone_detection.get_difference() B

Complexity

Size

Duplication

Duplication Side-by-Side

Filter issues like