Passed
Push — master ( 812b05...176334 )
by Konstantinos
02:19 queued 01:50
created

so_magic.data.features.features   A

Complexity

Total Complexity 34

Size/Duplication

Total Lines 183
Duplicated Lines 0 %

Importance

Changes 0
Metric Value
eloc 106
dl 0
loc 183
rs 9.68
c 0
b 0
f 0
wmc 34

23 Methods

Rating   Name   Duplication   Size   Complexity  
A BaseAttributeReporter.values() 0 2 1
A BaseAttributeReporter.value_set() 0 2 1
A BaseAttributeReporter.variable_type() 0 2 1
A TrackingFeature.from_callable() 0 6 1
A AttributeReporterInterface.variable_type() 0 12 1
A AttributeReporter.__str__() 0 2 1
A FeatureState.__str__() 0 2 1
A TrackingFeature.label() 0 2 1
A TrackingFeature.values() 0 2 1
A PhiFeatureFunction.__call__() 0 2 1
A TrackingFeature.state() 0 4 1
A AttributeReporter.values() 0 3 1
A StateMachine.update() 0 9 4
A TrackingFeature.update() 0 2 1
A AttributeReporter.variable_type() 0 3 1
A FeatureFunction.is_label() 0 4 2
A StateMachine.current() 0 3 1
A StateMachine.state() 0 4 1
A FeatureFunction.values() 0 2 1
A AttributeReporterInterface.value_set() 0 3 1
A AttributeReporterInterface.values() 0 12 1
A FeatureFunction.state() 0 3 1
A AttributeReporter.value_set() 0 2 1

3 Functions

Rating   Name   Duplication   Size   Complexity  
A _list_validator() 0 3 2
A _string_validator() 0 3 2
A is_callable() 0 5 3
1
from abc import ABC, abstractmethod
2
import attr
3
from so_magic.data.variables.types import VariableTypeFactory
4
5
6
class AttributeReporterInterface(ABC):
7
    """A class implementing this interface has the ability to report information on an attribute/variable
8
    of some structured data (observations)
9
    """
10
    @abstractmethod
11
    def values(self, datapoints, attribute, **kwargs):
12
        """Get the values ([N x 1] vector) of all datapoints (N x D) corresponding to the input variable/attribute.
13
14
        Args:
15
            datapoints (Datapoints): [description]
16
            attribute (str): [description]
17
18
        Return:
19
            (numpy.ndarray): the values in a [N x 1] vector
20
        """
21
        raise NotImplementedError
22
23
    @abstractmethod
24
    def variable_type(self, datapoints, attribute, **kwargs):
25
        """Call to get the variable type of the datapoints, given the attribute.
26
27
        Args:
28
            datapoints (Datapoints): [description]
29
            attribute (str): [description]
30
31
        Return:
32
            (str): [description]
33
        """
34
        raise NotImplementedError
35
36
    @abstractmethod
37
    def value_set(self, datapoints, attribute, **kwargs):
38
        raise NotImplementedError
39
40
41
class BaseAttributeReporter(AttributeReporterInterface):
42
43
    def values(self, datapoints, attribute, **kwargs):
44
        return datapoints[attribute]
45
46
    def variable_type(self, datapoints, attribute, **kwargs):
47
        return VariableTypeFactory.infer(datapoints, attribute, **kwargs)
48
49
    def value_set(self, datapoints, attribute, **kwargs):
50
        return set(datapoints.column(attribute))
51
52
53
#### HELPERS
54
def _list_validator(_self, _attribute, value):
55
    if not isinstance(value, list):
56
        raise ValueError(f'Expected a list; instead a {type(value).__name__} was given.')
57
58
59
def _string_validator(_self, _attribute, value):
60
    if not isinstance(value, str):
61
        raise ValueError(f'Expected a string; instead a {type(value).__name__} was given.')
62
63
64
@attr.s
65
class AttributeReporter:
66
    label = attr.ib(init=True)
67
    reporter = attr.ib(init=True, default=BaseAttributeReporter())
68
69
    def values(self, datapoints):
70
        """A default implementation of the values method"""
71
        return self.reporter.values(datapoints, self.label)
72
73
    def variable_type(self, datapoints):
74
        """A default implementation of the values method"""
75
        return self.reporter.variable_type(datapoints, self.label)
76
77
    def value_set(self, datapoints):
78
        return self.reporter.value_set(datapoints, self.label)
79
80
    def __str__(self):
81
        return self.label
82
83
@attr.s
84
class FeatureState:
85
    key = attr.ib(init=True)
86
    reporter = attr.ib(init=True)
87
88
    def __str__(self):
89
        return self.key
90
91
92
def is_callable(_self, _attribute, value):
93
    if not callable(value):
94
        raise ValueError(f"Expected a callable object; instead {type(value)} was given.")
95
    if value.func_code.co_argcount < 1:
96
        raise ValueError("Expected a callable that takes at least 1 argument; "
97
                            "instead a callable that takes no arguments was given.")
98
99
@attr.s
100
class FeatureFunction:
101
    """Example: Assume we have a datapoint v = [v_1, v_2, .., v_n, and 2 feature functions f_1, f_2\n
102
    Then we can produce an encoded vector (eg to feed for training a ML model) like: encoded_vector = [f_1(v), f_2(v)]
103
    """
104
    function = attr.ib(init=True, validator=is_callable)
105
106
    label = attr.ib(init=True, default=None)
107
    @label.validator
108
    def is_label(self, _attribute, value):
109
        if value is None:
110
            self.label = self.function.func_name
111
112
    def values(self, dataset):
113
        return self.function(dataset)
114
115
    @property
116
    def state(self):
117
        return FeatureState(self.label, self.function)
118
119
120
@attr.s
121
class StateMachine:
122
    states = attr.ib(init=True)
123
    init_state = attr.ib(init=True)
124
    _current = attr.ib(init=False, default=attr.Factory(lambda self: self.init_state, takes_self=True))
125
126
    @property
127
    def current(self):
128
        return self._current
129
130
    def update(self, *args, **kwargs):
131
        if len(args) > 1:
132
            self.states[args[0]] = args[1]
133
            self._current = args[0]
134
        elif len(args) > 0:
135
            if args[0] in self.states:
136
                self._current = args[0]
137
            else:
138
                raise RuntimeError(f"Requested to set the current state to '{args[0]}', "
139
                                   f"it is not in existing [{', '.join(sorted(self.states))}]")
140
141
    @property
142
    def state(self):
143
        """Construct an object representing the current state"""
144
        return FeatureState(self._current, self.states[self._current])
145
146
147
@attr.s
148
class TrackingFeature:
149
    feature = attr.ib(init=True)
150
    state_machine = attr.ib(init=True)
151
    variable_type = attr.ib(init=True, default=None)
152
153
    @classmethod
154
    def from_callable(cls, a_callable, label=None, variable_type=None):
155
        """Construct a feature that has one extract/report capability.
156
        Input id is correlated to the features position on the vector (see FeatureFunction above)"""
157
        return TrackingFeature(FeatureFunction(a_callable, label), StateMachine({'raw': a_callable}, 'raw'),
158
                               variable_type)
159
160
    def values(self, dataset):
161
        return self.state_machine.state.reporter(dataset)
162
163
    def label(self):
164
        return self.feature.label
165
166
    @property
167
    def state(self):
168
        """Returns the current state"""
169
        return self.state_machine.state
170
171
    def update(self, *args, **kwargs):
172
        self.state_machine.update(*args, **kwargs)
173
174
175
@attr.s
176
class FeatureIndex:
177
    keys = attr.ib(init=True, validator=_list_validator)
178
179
180
class PhiFeatureFunction:
181
    def __call__(self, *args, **kwargs):
182
        raise NotImplementedError
183