Completed
Pull Request — master (#1116)
by Lasse
01:41
created

bears.c_languages.codeclone_detection.CountVector   A

Complexity

Total Complexity 22

Size/Duplication

Total Lines 81
Duplicated Lines 0 %
Metric Value
dl 0
loc 81
rs 10
wmc 22

9 Methods

Rating   Name   Duplication   Size   Complexity  
B __init__() 0 20 7
A __str__() 0 2 1
A count_reference() 0 11 3
A __len__() 0 2 1
A difference() 0 12 4
A create_null_vector() 0 8 1
A maxabs() 0 12 2
A __abs__() 0 2 2
A __iter__() 0 2 1
1
from math import sqrt
2
3
from coalib.misc.Decorators import generate_repr
4
5
6
@generate_repr()
7
class CountVector:
8
    def __init__(self, name, conditions=None, weightings=None):
9
        """
10
        Creates a new count vector.
11
12
        :param name:       The name of the variable in the original code.
13
        :param conditions: The counting conditions as list of function objects,
14
                           each shall return true when getting data indicating
15
                           that this occurrence should be counted.
16
        :param weightings: Optional factors to weight counting conditions.
17
                           Defaults to 1 for all conditions.
18
        """
19
        self.name = name
20
        self.conditions = conditions if conditions is not None else []
21
        self.count_vector = [0 for elem in self.conditions]
22
        self.unweighted = [0 for elem in self.conditions]
23
        self.weightings = weightings
24
        if self.weightings is None:
25
            self.weightings = [1 for elem in self.conditions]
26
27
        assert len(self.count_vector) is len(self.weightings)
28
29
    def create_null_vector(self, name):
30
        """
31
        Creates a new CountVector object with the same counting conditions
32
        and weightings but initializes it to zero.
33
34
        :return: A CountVector object.
35
        """
36
        return CountVector(name, self.conditions, self.weightings)
37
38
    def count_reference(self, *args, **kwargs):
39
        """
40
        Counts the reference to the variable under the conditions held in this
41
        object.
42
43
        Any arguments or kwarguments will be passed to all conditions.
44
        """
45
        for i in range(len(self.conditions)):
46
            if self.conditions[i](*args, **kwargs):
47
                self.count_vector[i] += self.weightings[i]
48
                self.unweighted[i] += 1
49
50
    def __str__(self):
51
        return str(self.count_vector)
52
53
    def __len__(self):
54
        return len(self.count_vector)
55
56
    def __iter__(self):
57
        return iter(self.count_vector)
58
59
    def __abs__(self):
60
        return sqrt(sum(x**2 for x in self))
61
62
    def maxabs(self, other):
63
        """
64
        Calculates the absolute value of a vector that has the maximum
65
        entries row-wise of both given vectors. This can be used as
66
        normalization since this value is guaranteed to be bigger or equal
67
        the difference value of those two vectors.
68
69
        :param other: The vector to normalize with.
70
        :return:      A float value bigger or equal than the difference
71
                      between self and other.
72
        """
73
        return sqrt(sum(max(x, y)**2 for x, y in zip(self, other)))
74
75
    def difference(self, other):
76
        """
77
        Calculates an absolute difference value. 0 means no difference,
78
        i.e. the count vectors are identical.
79
80
        :param other: The CountVector to calculate the difference to.
81
        :return:      An absolute difference value.
82
        """
83
        assert isinstance(other, CountVector)
84
        assert len(other) == len(self)
85
86
        return sqrt(sum((x-y)**2 for x, y in zip(self, other)))
87