FeatureFunction.values()   A
last analyzed

Complexity

Conditions 1

Size

Total Lines 2
Code Lines 2

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
cc 1
eloc 2
nop 2
dl 0
loc 2
rs 10
c 0
b 0
f 0
1
from abc import ABC, abstractmethod
2
import attr
3
from so_magic.data.variables.types import VariableTypeFactory
4
5
6
class AttributeReporterInterface(ABC):
7
    """A class implementing this interface has the ability to report information on an attribute/variable
8
    of some structured data (observations)
9
    """
10
    @abstractmethod
11
    def values(self, datapoints, attribute, **kwargs):
12
        """Get the values ([N x 1] vector) of all datapoints (N x D) corresponding to the input variable/attribute.
13
14
        Args:
15
            datapoints (Datapoints): [description]
16
            attribute (str): [description]
17
18
        Return:
19
            (numpy.ndarray): the values in a [N x 1] vector
20
        """
21
        raise NotImplementedError
22
23
    @abstractmethod
24
    def variable_type(self, datapoints, attribute, **kwargs):
25
        """Call to get the variable type of the datapoints, given the attribute.
26
27
        Args:
28
            datapoints (Datapoints): [description]
29
            attribute (str): [description]
30
31
        Return:
32
            (str): [description]
33
        """
34
        raise NotImplementedError
35
36
    @abstractmethod
37
    def value_set(self, datapoints, attribute, **kwargs):
38
        raise NotImplementedError
39
40
41
class BaseAttributeReporter(AttributeReporterInterface):
42
43
    def values(self, datapoints, attribute, **kwargs):
44
        return datapoints[attribute]
45
46
    def variable_type(self, datapoints, attribute, **kwargs):
47
        return VariableTypeFactory.infer(datapoints, attribute, **kwargs)
48
49
    def value_set(self, datapoints, attribute, **kwargs):
50
        return set(datapoints.column(attribute))
51
52
53
#### HELPERS
54
def _list_validator(_self, _attribute, value):
55
    if not isinstance(value, list):
56
        raise ValueError(f'Expected a list; instead a {type(value).__name__} was given.')
57
58
59
def _string_validator(_self, _attribute, value):
60
    if not isinstance(value, str):
61
        raise ValueError(f'Expected a string; instead a {type(value).__name__} was given.')
62
63
64
@attr.s
65
class AttributeReporter:
66
    label = attr.ib(init=True)
67
    reporter = attr.ib(init=True, default=BaseAttributeReporter())
68
69
    def values(self, datapoints):
70
        """A default implementation of the values method"""
71
        return self.reporter.values(datapoints, self.label)
72
73
    def variable_type(self, datapoints):
74
        """A default implementation of the values method"""
75
        return self.reporter.variable_type(datapoints, self.label)
76
77
    def value_set(self, datapoints):
78
        return self.reporter.value_set(datapoints, self.label)
79
80
    def __str__(self):
81
        return self.label
82
83
84
@attr.s
85
class FeatureState:
86
    key = attr.ib(init=True)
87
    reporter = attr.ib(init=True)
88
89
    def __str__(self):
90
        return self.key
91
92
93
def is_callable(_self, _attribute, value):
94
    if not callable(value):
95
        raise ValueError(f"Expected a callable object; instead {type(value)} was given.")
96
    if value.func_code.co_argcount < 1:
97
        raise ValueError("Expected a callable that takes at least 1 argument; "
98
                            "instead a callable that takes no arguments was given.")
99
100
@attr.s
101
class FeatureFunction:
102
    """Example: Assume we have a datapoint v = [v_1, v_2, .., v_n, and 2 feature functions f_1, f_2\n
103
    Then we can produce an encoded vector (eg to feed for training a ML model) like: encoded_vector = [f_1(v), f_2(v)]
104
    """
105
    function = attr.ib(init=True, validator=is_callable)
106
107
    label = attr.ib(init=True, default=None)
108
    @label.validator
109
    def is_label(self, _attribute, value):
110
        if value is None:
111
            self.label = self.function.func_name
112
113
    def values(self, dataset):
114
        return self.function(dataset)
115
116
    @property
117
    def state(self):
118
        return FeatureState(self.label, self.function)
119
120
121
@attr.s
122
class StateMachine:
123
    states = attr.ib(init=True)
124
    init_state = attr.ib(init=True)
125
    _current = attr.ib(init=False, default=attr.Factory(lambda self: self.init_state, takes_self=True))
126
127
    @property
128
    def current(self):
129
        return self._current
130
131
    def update(self, *args, **kwargs):
132
        if len(args) > 1:
133
            self.states[args[0]] = args[1]
134
            self._current = args[0]
135
        elif len(args) > 0:
136
            if args[0] in self.states:
137
                self._current = args[0]
138
            else:
139
                raise RuntimeError(f"Requested to set the current state to '{args[0]}', "
140
                                   f"it is not in existing [{', '.join(sorted(self.states))}]")
141
142
    @property
143
    def state(self):
144
        """Construct an object representing the current state"""
145
        return FeatureState(self._current, self.states[self._current])
146
147
148
@attr.s
149
class TrackingFeature:
150
    feature = attr.ib(init=True)
151
    state_machine = attr.ib(init=True)
152
    variable_type = attr.ib(init=True, default=None)
153
154
    @classmethod
155
    def from_callable(cls, a_callable, label=None, variable_type=None):
156
        """Construct a feature that has one extract/report capability.
157
        Input id is correlated to the features position on the vector (see FeatureFunction above)"""
158
        return TrackingFeature(FeatureFunction(a_callable, label), StateMachine({'raw': a_callable}, 'raw'),
159
                               variable_type)
160
161
    def values(self, dataset):
162
        return self.state_machine.state.reporter(dataset)
163
164
    def label(self):
165
        return self.feature.label
166
167
    @property
168
    def state(self):
169
        """Returns the current state"""
170
        return self.state_machine.state
171
172
    def update(self, *args, **kwargs):
173
        self.state_machine.update(*args, **kwargs)
174
175
176
@attr.s
177
class FeatureIndex:
178
    keys = attr.ib(init=True, validator=_list_validator)
179
180
181
class PhiFeatureFunction:
182
    def __call__(self, *args, **kwargs):
183
        raise NotImplementedError
184