|
1
|
|
|
from math import sqrt |
|
2
|
|
|
|
|
3
|
|
|
from coalib.misc.Decorators import generate_repr |
|
4
|
|
|
|
|
5
|
|
|
|
|
6
|
|
|
@generate_repr() |
|
7
|
|
|
class CountVector: |
|
8
|
|
|
def __init__(self, name, conditions=None, weightings=None): |
|
9
|
|
|
""" |
|
10
|
|
|
Creates a new count vector. |
|
11
|
|
|
|
|
12
|
|
|
:param name: The name of the variable in the original code. |
|
13
|
|
|
:param conditions: The counting conditions as list of function objects, |
|
14
|
|
|
each shall return true when getting data indicating |
|
15
|
|
|
that this occurrence should be counted. |
|
16
|
|
|
:param weightings: Optional factors to weight counting conditions. |
|
17
|
|
|
Defaults to 1 for all conditions. |
|
18
|
|
|
""" |
|
19
|
|
|
self.name = name |
|
20
|
|
|
self.conditions = conditions if conditions is not None else [] |
|
21
|
|
|
self.count_vector = [0 for elem in self.conditions] |
|
22
|
|
|
self.unweighted = [0 for elem in self.conditions] |
|
23
|
|
|
self.weightings = weightings |
|
24
|
|
|
if self.weightings is None: |
|
25
|
|
|
self.weightings = [1 for elem in self.conditions] |
|
26
|
|
|
|
|
27
|
|
|
assert len(self.count_vector) is len(self.weightings) |
|
28
|
|
|
|
|
29
|
|
|
def create_null_vector(self, name): |
|
30
|
|
|
""" |
|
31
|
|
|
Creates a new CountVector object with the same counting conditions |
|
32
|
|
|
and weightings but initializes it to zero. |
|
33
|
|
|
|
|
34
|
|
|
:return: A CountVector object. |
|
35
|
|
|
""" |
|
36
|
|
|
return CountVector(name, self.conditions, self.weightings) |
|
37
|
|
|
|
|
38
|
|
|
def count_reference(self, *args, **kwargs): |
|
39
|
|
|
""" |
|
40
|
|
|
Counts the reference to the variable under the conditions held in this |
|
41
|
|
|
object. |
|
42
|
|
|
|
|
43
|
|
|
Any arguments or kwarguments will be passed to all conditions. |
|
44
|
|
|
""" |
|
45
|
|
|
for i in range(len(self.conditions)): |
|
46
|
|
|
if self.conditions[i](*args, **kwargs): |
|
47
|
|
|
self.count_vector[i] += self.weightings[i] |
|
48
|
|
|
self.unweighted[i] += 1 |
|
49
|
|
|
|
|
50
|
|
|
def __str__(self): |
|
51
|
|
|
return str(self.count_vector) |
|
52
|
|
|
|
|
53
|
|
|
def __len__(self): |
|
54
|
|
|
return len(self.count_vector) |
|
55
|
|
|
|
|
56
|
|
|
def __iter__(self): |
|
57
|
|
|
return iter(self.count_vector) |
|
58
|
|
|
|
|
59
|
|
|
def __abs__(self): |
|
60
|
|
|
return sqrt(sum(x**2 for x in self)) |
|
61
|
|
|
|
|
62
|
|
|
def maxabs(self, other): |
|
63
|
|
|
""" |
|
64
|
|
|
Calculates the absolute value of a vector that has the maximum |
|
65
|
|
|
entries row-wise of both given vectors. This can be used as |
|
66
|
|
|
normalization since this value is guaranteed to be bigger or equal |
|
67
|
|
|
the difference value of those two vectors. |
|
68
|
|
|
|
|
69
|
|
|
:param other: The vector to normalize with. |
|
70
|
|
|
:return: A float value bigger or equal than the difference |
|
71
|
|
|
between self and other. |
|
72
|
|
|
""" |
|
73
|
|
|
return sqrt(sum(max(x, y)**2 for x, y in zip(self, other))) |
|
74
|
|
|
|
|
75
|
|
|
def difference(self, other): |
|
76
|
|
|
""" |
|
77
|
|
|
Calculates an absolute difference value. 0 means no difference, |
|
78
|
|
|
i.e. the count vectors are identical. |
|
79
|
|
|
|
|
80
|
|
|
:param other: The CountVector to calculate the difference to. |
|
81
|
|
|
:return: An absolute difference value. |
|
82
|
|
|
""" |
|
83
|
|
|
assert isinstance(other, CountVector) |
|
84
|
|
|
assert len(other) == len(self) |
|
85
|
|
|
|
|
86
|
|
|
return sqrt(sum((x-y)**2 for x, y in zip(self, other))) |
|
87
|
|
|
|