| 1 |  |  | import functools | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2 |  |  | from itertools import combinations | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4 |  |  | from bears.c_languages.codeclone_detection.ClangCountVectorCreator import ( | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5 |  |  |     ClangCountVectorCreator) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 6 |  |  | from bears.c_languages.codeclone_detection.ClangCountingConditions import ( | 
            
                                                                                                            
                            
            
                                    
            
            
                | 7 |  |  |     condition_dict) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 8 |  |  | from bears.c_languages.codeclone_detection.CloneDetectionRoutines import ( | 
            
                                                                                                            
                            
            
                                    
            
            
                | 9 |  |  |     compare_functions, | 
            
                                                                                                            
                            
            
                                    
            
            
                | 10 |  |  |     get_count_matrices) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 11 |  |  | from coalib.bears.GlobalBear import GlobalBear | 
            
                                                                                                            
                            
            
                                    
            
            
                | 12 |  |  | from coalib.collecting.Collectors import collect_dirs | 
            
                                                                                                            
                            
            
                                    
            
            
                | 13 |  |  | from coalib.misc.StringConverter import StringConverter | 
            
                                                                                                            
                            
            
                                    
            
            
                | 14 |  |  | from coalib.results.HiddenResult import HiddenResult | 
            
                                                                                                            
                            
            
                                    
            
            
                | 15 |  |  | from coalib.settings.Setting import typed_ordered_dict, path_list | 
            
                                                                                                            
                            
            
                                    
            
            
                | 16 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 17 |  |  | # counting_condition_dict is a function object generated by typed_dict. This | 
            
                                                                                                            
                            
            
                                    
            
            
                | 18 |  |  | # function takes a setting and creates a dictionary out of it while it | 
            
                                                                                                            
                            
            
                                    
            
            
                | 19 |  |  | # converts all keys to counting condition function objects (via the | 
            
                                                                                                            
                            
            
                                    
            
            
                | 20 |  |  | # condition_dict) and all values to floats while unset values default to 1. | 
            
                                                                                                            
                            
            
                                    
            
            
                | 21 |  |  | counting_condition_dict = typed_ordered_dict( | 
            
                                                                                                            
                            
            
                                    
            
            
                | 22 |  |  |     lambda setting: condition_dict[str(setting).lower()], | 
            
                                                                                                            
                            
            
                                    
            
            
                | 23 |  |  |     float, | 
            
                                                                                                            
                            
            
                                    
            
            
                | 24 |  |  |     1) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 25 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 26 |  |  | default_cc_dict = counting_condition_dict(StringConverter( | 
            
                                                                                                            
                            
            
                                    
            
            
                | 27 |  |  |     """ | 
            
                                                                                                            
                            
            
                                    
            
            
                | 28 |  |  | used: 0, | 
            
                                                                                                            
                            
            
                                    
            
            
                | 29 |  |  | returned: 1.4, | 
            
                                                                                                            
                            
            
                                    
            
            
                | 30 |  |  | is_condition: 0, | 
            
                                                                                                            
                            
            
                                    
            
            
                | 31 |  |  | in_condition: 1.4, | 
            
                                                                                                            
                            
            
                                    
            
            
                | 32 |  |  | in_second_level_condition: 1.4, | 
            
                                                                                                            
                            
            
                                    
            
            
                | 33 |  |  | in_third_level_condition: 1.0, | 
            
                                                                                                            
                            
            
                                    
            
            
                | 34 |  |  | is_assignee: 0, | 
            
                                                                                                            
                            
            
                                    
            
            
                | 35 |  |  | is_assigner: 0.6, | 
            
                                                                                                            
                            
            
                                    
            
            
                | 36 |  |  | loop_content: 0, | 
            
                                                                                                            
                            
            
                                    
            
            
                | 37 |  |  | second_level_loop_content, | 
            
                                                                                                            
                            
            
                                    
            
            
                | 38 |  |  | third_level_loop_content, | 
            
                                                                                                            
                            
            
                                    
            
            
                | 39 |  |  | is_param: 2, | 
            
                                                                                                            
                            
            
                                    
            
            
                | 40 |  |  | is_called: 1.4, | 
            
                                                                                                            
                            
            
                                    
            
            
                | 41 |  |  | is_call_param: 0.0, | 
            
                                                                                                            
                            
            
                                    
            
            
                | 42 |  |  | in_sum: 2.0, | 
            
                                                                                                            
                            
            
                                    
            
            
                | 43 |  |  | in_product: 0, | 
            
                                                                                                            
                            
            
                                    
            
            
                | 44 |  |  | in_binary_operation, | 
            
                                                                                                            
                            
            
                                    
            
            
                | 45 |  |  | member_accessed""")) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 46 |  |  |  | 
            
                                                                                                            
                                                                
            
                                    
            
            
                | 47 |  |  |  | 
            
                                                                        
                            
            
                                    
            
            
                | 48 |  |  | def get_difference(function_pair, | 
            
                                                                        
                            
            
                                    
            
            
                | 49 |  |  |                    count_matrices, | 
            
                                                                        
                            
            
                                    
            
            
                | 50 |  |  |                    average_calculation, | 
            
                                                                        
                            
            
                                    
            
            
                | 51 |  |  |                    poly_postprocessing, | 
            
                                                                        
                            
            
                                    
            
            
                | 52 |  |  |                    exp_postprocessing): | 
            
                                                                        
                            
            
                                    
            
            
                | 53 |  |  |     """ | 
            
                                                                        
                            
            
                                    
            
            
                | 54 |  |  |     Retrieves the difference between two functions using the munkres algorithm. | 
            
                                                                        
                            
            
                                    
            
            
                | 55 |  |  |  | 
            
                                                                        
                            
            
                                    
            
            
                | 56 |  |  |     :param function_pair:       A tuple containing both indices for the | 
            
                                                                        
                            
            
                                    
            
            
                | 57 |  |  |                                 count_matrices dictionary. | 
            
                                                                        
                            
            
                                    
            
            
                | 58 |  |  |     :param count_matrices:      A dictionary holding CMs. | 
            
                                                                        
                            
            
                                    
            
            
                | 59 |  |  |     :param average_calculation: If set to true the difference calculation | 
            
                                                                        
                            
            
                                    
            
            
                | 60 |  |  |                                 function will take the average of all variable | 
            
                                                                        
                            
            
                                    
            
            
                | 61 |  |  |                                 differences as the difference, else it will | 
            
                                                                        
                            
            
                                    
            
            
                | 62 |  |  |                                 normalize the function as a whole and thus | 
            
                                                                        
                            
            
                                    
            
            
                | 63 |  |  |                                 weighting in variables dependent on their size. | 
            
                                                                        
                            
            
                                    
            
            
                | 64 |  |  |     :param poly_postprocessing: If set to true, the difference value of big | 
            
                                                                        
                            
            
                                    
            
            
                | 65 |  |  |                                 function pairs will be reduced using a | 
            
                                                                        
                            
            
                                    
            
            
                | 66 |  |  |                                 polynomial approach. | 
            
                                                                        
                            
            
                                    
            
            
                | 67 |  |  |     :param exp_postprocessing:  If set to true, the difference value of big | 
            
                                                                        
                            
            
                                    
            
            
                | 68 |  |  |                                 function pairs will be reduced using an | 
            
                                                                        
                            
            
                                    
            
            
                | 69 |  |  |                                 exponential approach. | 
            
                                                                        
                            
            
                                    
            
            
                | 70 |  |  |     :return:                    A tuple containing both function ids and their | 
            
                                                                        
                            
            
                                    
            
            
                | 71 |  |  |                                 difference. | 
            
                                                                        
                            
            
                                    
            
            
                | 72 |  |  |     """ | 
            
                                                                        
                            
            
                                    
            
            
                | 73 |  |  |     function_1, function_2 = function_pair | 
            
                                                                        
                            
            
                                    
            
            
                | 74 |  |  |     return (function_1, | 
            
                                                                        
                            
            
                                    
            
            
                | 75 |  |  |             function_2, | 
            
                                                                        
                            
            
                                    
            
            
                | 76 |  |  |             compare_functions(count_matrices[function_1], | 
            
                                                                        
                            
            
                                    
            
            
                | 77 |  |  |                               count_matrices[function_2], | 
            
                                                                        
                            
            
                                    
            
            
                | 78 |  |  |                               average_calculation, | 
            
                                                                        
                            
            
                                    
            
            
                | 79 |  |  |                               poly_postprocessing, | 
            
                                                                        
                            
            
                                    
            
            
                | 80 |  |  |                               exp_postprocessing)) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 81 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 82 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 83 |  |  | class ClangFunctionDifferenceBear(GlobalBear): | 
            
                                                                                                            
                            
            
                                    
            
            
                | 84 |  |  |     def run(self, | 
            
                                                                                                            
                            
            
                                    
            
            
                | 85 |  |  |             counting_conditions: counting_condition_dict=default_cc_dict, | 
            
                                                                                                            
                            
            
                                    
            
            
                | 86 |  |  |             average_calculation: bool=False, | 
            
                                                                                                            
                            
            
                                    
            
            
                | 87 |  |  |             poly_postprocessing: bool=True, | 
            
                                                                                                            
                            
            
                                    
            
            
                | 88 |  |  |             exp_postprocessing: bool=False, | 
            
                                                                                                            
                            
            
                                    
            
            
                | 89 |  |  |             extra_include_paths: path_list=()): | 
            
                                                                                                            
                            
            
                                    
            
            
                | 90 |  |  |         ''' | 
            
                                                                                                            
                            
            
                                    
            
            
                | 91 |  |  |         Retrieves similarities for code clone detection. Those can be reused in | 
            
                                                                                                            
                            
            
                                    
            
            
                | 92 |  |  |         another bear to produce results. | 
            
                                                                                                            
                            
            
                                    
            
            
                | 93 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 94 |  |  |         Postprocessing may be done because small functions are less likely to | 
            
                                                                                                            
                            
            
                                    
            
            
                | 95 |  |  |         be clones at the same difference value than big functions which may | 
            
                                                                                                            
                            
            
                                    
            
            
                | 96 |  |  |         provide a better refactoring opportunity for the user. | 
            
                                                                                                            
                            
            
                                    
            
            
                | 97 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 98 |  |  |         :param counting_conditions: A comma seperated list of counting | 
            
                                                                                                            
                            
            
                                    
            
            
                | 99 |  |  |                                     conditions. Possible values are: used, | 
            
                                                                                                            
                            
            
                                    
            
            
                | 100 |  |  |                                     returned, is_condition, in_condition, | 
            
                                                                                                            
                            
            
                                    
            
            
                | 101 |  |  |                                     in_second_level_condition, | 
            
                                                                                                            
                            
            
                                    
            
            
                | 102 |  |  |                                     in_third_level_condition, is_assignee, | 
            
                                                                                                            
                            
            
                                    
            
            
                | 103 |  |  |                                     is_assigner, loop_content, | 
            
                                                                                                            
                            
            
                                    
            
            
                | 104 |  |  |                                     second_level_loop_content, | 
            
                                                                                                            
                            
            
                                    
            
            
                | 105 |  |  |                                     third_level_loop_content, is_param, | 
            
                                                                                                            
                            
            
                                    
            
            
                | 106 |  |  |                                     in_sum, in_product, in_binary_operation, | 
            
                                                                                                            
                            
            
                                    
            
            
                | 107 |  |  |                                     member_accessed. | 
            
                                                                                                            
                            
            
                                    
            
            
                | 108 |  |  |                                     Weightings can be assigned to each | 
            
                                                                                                            
                            
            
                                    
            
            
                | 109 |  |  |                                     condition due to providing a dict | 
            
                                                                                                            
                            
            
                                    
            
            
                | 110 |  |  |                                     value, i.e. having used weighted in | 
            
                                                                                                            
                            
            
                                    
            
            
                | 111 |  |  |                                     half as much as other conditions would | 
            
                                                                                                            
                            
            
                                    
            
            
                | 112 |  |  |                                     simply be: "used: 0.5, is_assignee". | 
            
                                                                                                            
                            
            
                                    
            
            
                | 113 |  |  |                                     Weightings default to 1 if unset. | 
            
                                                                                                            
                            
            
                                    
            
            
                | 114 |  |  |         :param average_calculation: If set to true the difference calculation | 
            
                                                                                                            
                            
            
                                    
            
            
                | 115 |  |  |                                     function will take the average of all | 
            
                                                                                                            
                            
            
                                    
            
            
                | 116 |  |  |                                     variable differences as the difference, | 
            
                                                                                                            
                            
            
                                    
            
            
                | 117 |  |  |                                     else it will normalize the function as a | 
            
                                                                                                            
                            
            
                                    
            
            
                | 118 |  |  |                                     whole and thus weighting in variables | 
            
                                                                                                            
                            
            
                                    
            
            
                | 119 |  |  |                                     dependent on their size. | 
            
                                                                                                            
                            
            
                                    
            
            
                | 120 |  |  |         :param poly_postprocessing: If set to true, the difference value of big | 
            
                                                                                                            
                            
            
                                    
            
            
                | 121 |  |  |                                     function pairs will be reduced using a | 
            
                                                                                                            
                            
            
                                    
            
            
                | 122 |  |  |                                     polynomial approach. | 
            
                                                                                                            
                            
            
                                    
            
            
                | 123 |  |  |         :param extra_include_paths: A list containing additional include paths. | 
            
                                                                                                            
                            
            
                                    
            
            
                | 124 |  |  |         :param exp_postprocessing:  If set to true, the difference value of big | 
            
                                                                                                            
                            
            
                                    
            
            
                | 125 |  |  |                                     function pairs will be reduced using an | 
            
                                                                                                            
                            
            
                                    
            
            
                | 126 |  |  |                                     exponential approach. | 
            
                                                                                                            
                            
            
                                    
            
            
                | 127 |  |  |         ''' | 
            
                                                                                                            
                            
            
                                    
            
            
                | 128 |  |  |         self.debug("Using the following counting conditions:") | 
            
                                                                                                            
                            
            
                                    
            
            
                | 129 |  |  |         for key, val in counting_conditions.items(): | 
            
                                                                                                            
                            
            
                                    
            
            
                | 130 |  |  |             self.debug(" *", key.__name__, "(weighting: {})".format(val)) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 131 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 132 |  |  |         self.debug("Creating count matrices...") | 
            
                                                                                                            
                            
            
                                    
            
            
                | 133 |  |  |         count_matrices = get_count_matrices( | 
            
                                                                                                            
                            
            
                                    
            
            
                | 134 |  |  |             ClangCountVectorCreator(list(counting_conditions.keys()), | 
            
                                                                                                            
                            
            
                                    
            
            
                | 135 |  |  |                                     list(counting_conditions.values())), | 
            
                                                                                                            
                            
            
                                    
            
            
                | 136 |  |  |             list(self.file_dict.keys()), | 
            
                                                                                                            
                            
            
                                    
            
            
                | 137 |  |  |             lambda prog: self.debug("{:2.4f}%...".format(prog)), | 
            
                                                                                                            
                            
            
                                    
            
            
                | 138 |  |  |             self.section["files"].origin, | 
            
                                                                                                            
                            
            
                                    
            
            
                | 139 |  |  |             collect_dirs(extra_include_paths)) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 140 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 141 |  |  |         self.debug("Calculating differences...") | 
            
                                                                                                            
                            
            
                                    
            
            
                | 142 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 143 |  |  |         differences = [] | 
            
                                                                                                            
                            
            
                                    
            
            
                | 144 |  |  |         function_count = len(count_matrices) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 145 |  |  |         # Thats n over 2, hardcoded to simplify calculation | 
            
                                                                                                            
                            
            
                                    
            
            
                | 146 |  |  |         combination_length = function_count * (function_count-1) / 2 | 
            
                                                                                                            
                            
            
                                    
            
            
                | 147 |  |  |         partial_get_difference = functools.partial( | 
            
                                                                                                            
                            
            
                                    
            
            
                | 148 |  |  |             get_difference, | 
            
                                                                                                            
                            
            
                                    
            
            
                | 149 |  |  |             count_matrices=count_matrices, | 
            
                                                                                                            
                            
            
                                    
            
            
                | 150 |  |  |             average_calculation=average_calculation, | 
            
                                                                                                            
                            
            
                                    
            
            
                | 151 |  |  |             poly_postprocessing=poly_postprocessing, | 
            
                                                                                                            
                            
            
                                    
            
            
                | 152 |  |  |             exp_postprocessing=exp_postprocessing) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 153 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 154 |  |  |         for i, elem in enumerate( | 
            
                                                                                                            
                            
            
                                    
            
            
                | 155 |  |  |                 map(partial_get_difference, | 
            
                                                                                                            
                            
            
                                    
            
            
                | 156 |  |  |                     [(f1, f2) for f1, f2 in combinations(count_matrices, 2)])): | 
            
                                                                                                            
                            
            
                                    
            
            
                | 157 |  |  |             if i % 50 == 0: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 158 |  |  |                 self.debug("{:2.4f}%...".format(100*i/combination_length)) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 159 |  |  |             differences.append(elem) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 160 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 161 |  |  |         yield HiddenResult(self, differences) | 
            
                                                                                                            
                                                                
            
                                    
            
            
                | 162 |  |  |         yield HiddenResult(self, count_matrices) | 
            
                                                        
            
                                    
            
            
                | 163 |  |  |  |