Completed
Push — appveyor ( 280314...2c0e2c )
by Konstantinos
02:09
created

TrackingFeature.update()   A

Complexity

Conditions 1

Size

Total Lines 2
Code Lines 2

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
cc 1
eloc 2
nop 3
dl 0
loc 2
rs 10
c 0
b 0
f 0
1
from abc import ABC, abstractmethod
2
import attr
3
from so_magic.data.variables.types import VariableTypeFactory
4
5
6
class AttributeReporter(ABC):
7
    """A class implementing this interface has the ability to report information on an attribute/variable
8
    of some structured data (observations)
9
    """
10
    @abstractmethod
11
    def values(self, datapoints, attribute, **kwargs):
12
        """Call to get the values ([N x 1] vector) of all datapoints (N x D) corresponding to the input variable/attribute.
13
14
        Args:
15
            datapoints (Datapoints): [description]
16
            attribute (str): [description]
17
18
        Return:
19
            (numpy.ndarray): the values in a [N x 1] vector
20
        """
21
        raise NotImplementedError
22
23
    @abstractmethod
24
    def variable_type(self, datapoints, attribute, **kwargs):
25
        """Call to get the variable type of the datapoints, given the attribute.
26
27
        Args:
28
            datapoints (Datapoints): [description]
29
            attribute (str): [description]
30
31
        Return:
32
            (str): [description]
33
        """
34
        raise NotImplementedError
35
36
    @abstractmethod
37
    def value_set(self, datapoints, attribute, **kwargs):
38
        raise NotImplementedError
39
40
41
class BaseAttributeReporter(AttributeReporter):
42
43
    def values(self, datapoints, attribute, **kwargs):
44
        return datapoints[attribute]
45
46
    def variable_type(self, datapoints, attribute, **kwargs):
47
        return VariableTypeFactory.infer(datapoints, attribute, **kwargs)
48
49
    def value_set(self, datapoints, attribute, **kwargs):
50
        return set([_ for _ in datapoints.column(attribute)])
51
52
53
#### HELPERS
54
def _list_validator(self, attribute, value):
55
    if not type(value) == list:
56
        raise ValueError(f'Expected a list; instead a {type(value).__name__} was given.')
57
58
def _string_validator(self, attribute, value):
59
    if not type(value) == str:
60
        raise ValueError(f'Expected a string; instead a {type(value).__name__} was given.')
61
62
63
@attr.s
64
class AttributeReporter:
65
    label = attr.ib(init=True)
66
    reporter = attr.ib(init=True, default=BaseAttributeReporter())
67
68
    def values(self, datapoints):
69
        """A default implementation of the values method"""
70
        return self.reporter.values(datapoints, self.label)
71
72
    def variable_type(self, datapoints):
73
        """A default implementation of the values method"""
74
        return self.reporter.variable_type(datapoints, self.label)
75
76
    def value_set(self, datapoints):
77
        return self.reporter.value_set(datapoints, self.label)
78
79
    def __str__(self):
80
        return self.label
81
82
@attr.s
83
class FeatureState:
84
    key = attr.ib(init=True)
85
    reporter = attr.ib(init=True)
86
87
    def __str__(self):
88
        return self.key
89
90
91
@attr.s
92
class FeatureFunction:
93
    """Example: Assume we have a datapoint v = [v_1, v_2, .., v_n, and 2 feature functions f_1, f_2\n
94
    Then we can produce an encoded vector (eg to feed for training a ML model) like: encoded_vector = [f_1(v), f_2(v)]
95
    """
96
    function = attr.ib(init=True)
97
    @function.validator
98
    def is_callable(self, attribute, value):
99
        if not callable(value):
100
            raise ValueError(f"Expected a callable object; instead {type(value)} was given.")
101
        if value.func_code.co_argcount < 1:
102
            raise ValueError(f"Expected a callable that takes at least 1 argument; instead a callable that takes no arguments was given.")
103
104
    label = attr.ib(init=True, default=None)
105
    @label.validator
106
    def is_label(self, attribute, value):
107
        if value is None:
108
            self.label = self.function.func_name
109
110
    def values(self, dataset):
111
        return self.function(dataset)
112
113
    @property
114
    def state(self):
115
        return FeatureState(self.label, self.function)
116
117
118
@attr.s
119
class StateMachine:
120
    states = attr.ib(init=True)
121
    init_state = attr.ib(init=True)
122
    _current = attr.ib(init=False, default=attr.Factory(lambda self: self.init_state, takes_self=True))
123
124
    @property
125
    def current(self):
126
        return self._current
127
128
    def update(self, *args, **kwargs):
129
        if 1 < len(args):
130
            self.states[args[0]] = args[1]
131
            self._current = args[0]
132
        elif 0 < len(args):
133
            if args[0] in self.states:
134
                self._current = args[0]
135
            else:
136
                raise RuntimeError(f"Requested to set the current state to '{args[0]}', it is not in existing [{', '.join(sorted(self.states))}]")
137
138
    @property
139
    def state(self):
140
        """Construct an object representing the current state"""
141
        return FeatureState(self._current, self.states[self._current])
142
143
144
@attr.s
145
class TrackingFeature:
146
    feature = attr.ib(init=True)
147
    state_machine = attr.ib(init=True)
148
    variable_type = attr.ib(init=True, default=None)
149
150
    @classmethod
151
    def from_callable(cls, a_callable, label=None, variable_type=None):
152
        """Construct a feature that has one extract/report capability. Input id is correlated to the features position on the vector (see FeatureFunction above)"""
153
        return TrackingFeature(FeatureFunction(a_callable, label), StateMachine({'raw': a_callable}, 'raw'), variable_type)
154
155
    def values(self, dataset):
156
        return self.state_machine.state.reporter(dataset)
157
158
    def label(self):
159
        return self.feature.label
160
161
    @property
162
    def state(self):
163
        """Returns the current state"""
164
        return self.state_machine.state
165
166
    def update(self, *args, **kwargs):
167
        self.state_machine.update(*args, **kwargs)
168
169
170
@attr.s
171
class FeatureIndex:
172
    keys = attr.ib(init=True, validator=_list_validator)
173
174
175
class PhiFeatureFunction:
176
    def __call__(self, *args, **kwargs):
177
        raise NotImplementedError