1
|
|
|
from abc import ABC, abstractmethod |
2
|
|
|
import attr |
3
|
|
|
from so_magic.data.variables.types import VariableTypeFactory |
4
|
|
|
|
5
|
|
|
|
6
|
|
|
class AttributeReporterInterface(ABC): |
7
|
|
|
"""A class implementing this interface has the ability to report information on an attribute/variable |
8
|
|
|
of some structured data (observations) |
9
|
|
|
""" |
10
|
|
|
@abstractmethod |
11
|
|
|
def values(self, datapoints, attribute, **kwargs): |
12
|
|
|
"""Get the values ([N x 1] vector) of all datapoints (N x D) corresponding to the input variable/attribute. |
13
|
|
|
|
14
|
|
|
Args: |
15
|
|
|
datapoints (Datapoints): [description] |
16
|
|
|
attribute (str): [description] |
17
|
|
|
|
18
|
|
|
Return: |
19
|
|
|
(numpy.ndarray): the values in a [N x 1] vector |
20
|
|
|
""" |
21
|
|
|
raise NotImplementedError |
22
|
|
|
|
23
|
|
|
@abstractmethod |
24
|
|
|
def variable_type(self, datapoints, attribute, **kwargs): |
25
|
|
|
"""Call to get the variable type of the datapoints, given the attribute. |
26
|
|
|
|
27
|
|
|
Args: |
28
|
|
|
datapoints (Datapoints): [description] |
29
|
|
|
attribute (str): [description] |
30
|
|
|
|
31
|
|
|
Return: |
32
|
|
|
(str): [description] |
33
|
|
|
""" |
34
|
|
|
raise NotImplementedError |
35
|
|
|
|
36
|
|
|
@abstractmethod |
37
|
|
|
def value_set(self, datapoints, attribute, **kwargs): |
38
|
|
|
raise NotImplementedError |
39
|
|
|
|
40
|
|
|
|
41
|
|
|
class BaseAttributeReporter(AttributeReporterInterface): |
42
|
|
|
|
43
|
|
|
def values(self, datapoints, attribute, **kwargs): |
44
|
|
|
return datapoints[attribute] |
45
|
|
|
|
46
|
|
|
def variable_type(self, datapoints, attribute, **kwargs): |
47
|
|
|
return VariableTypeFactory.infer(datapoints, attribute, **kwargs) |
48
|
|
|
|
49
|
|
|
def value_set(self, datapoints, attribute, **kwargs): |
50
|
|
|
return set(datapoints.column(attribute)) |
51
|
|
|
|
52
|
|
|
|
53
|
|
|
#### HELPERS |
54
|
|
|
def _list_validator(_self, _attribute, value): |
55
|
|
|
if not isinstance(value, list): |
56
|
|
|
raise ValueError(f'Expected a list; instead a {type(value).__name__} was given.') |
57
|
|
|
|
58
|
|
|
|
59
|
|
|
def _string_validator(_self, _attribute, value): |
60
|
|
|
if not isinstance(value, str): |
61
|
|
|
raise ValueError(f'Expected a string; instead a {type(value).__name__} was given.') |
62
|
|
|
|
63
|
|
|
|
64
|
|
|
@attr.s |
65
|
|
|
class AttributeReporter: |
66
|
|
|
label = attr.ib(init=True) |
67
|
|
|
reporter = attr.ib(init=True, default=BaseAttributeReporter()) |
68
|
|
|
|
69
|
|
|
def values(self, datapoints): |
70
|
|
|
"""A default implementation of the values method""" |
71
|
|
|
return self.reporter.values(datapoints, self.label) |
72
|
|
|
|
73
|
|
|
def variable_type(self, datapoints): |
74
|
|
|
"""A default implementation of the values method""" |
75
|
|
|
return self.reporter.variable_type(datapoints, self.label) |
76
|
|
|
|
77
|
|
|
def value_set(self, datapoints): |
78
|
|
|
return self.reporter.value_set(datapoints, self.label) |
79
|
|
|
|
80
|
|
|
def __str__(self): |
81
|
|
|
return self.label |
82
|
|
|
|
83
|
|
|
|
84
|
|
|
@attr.s |
85
|
|
|
class FeatureState: |
86
|
|
|
key = attr.ib(init=True) |
87
|
|
|
reporter = attr.ib(init=True) |
88
|
|
|
|
89
|
|
|
def __str__(self): |
90
|
|
|
return self.key |
91
|
|
|
|
92
|
|
|
|
93
|
|
|
def is_callable(_self, _attribute, value): |
94
|
|
|
if not callable(value): |
95
|
|
|
raise ValueError(f"Expected a callable object; instead {type(value)} was given.") |
96
|
|
|
if value.func_code.co_argcount < 1: |
97
|
|
|
raise ValueError("Expected a callable that takes at least 1 argument; " |
98
|
|
|
"instead a callable that takes no arguments was given.") |
99
|
|
|
|
100
|
|
|
@attr.s |
101
|
|
|
class FeatureFunction: |
102
|
|
|
"""Example: Assume we have a datapoint v = [v_1, v_2, .., v_n, and 2 feature functions f_1, f_2\n |
103
|
|
|
Then we can produce an encoded vector (eg to feed for training a ML model) like: encoded_vector = [f_1(v), f_2(v)] |
104
|
|
|
""" |
105
|
|
|
function = attr.ib(init=True, validator=is_callable) |
106
|
|
|
|
107
|
|
|
label = attr.ib(init=True, default=None) |
108
|
|
|
@label.validator |
109
|
|
|
def is_label(self, _attribute, value): |
110
|
|
|
if value is None: |
111
|
|
|
self.label = self.function.func_name |
112
|
|
|
|
113
|
|
|
def values(self, dataset): |
114
|
|
|
return self.function(dataset) |
115
|
|
|
|
116
|
|
|
@property |
117
|
|
|
def state(self): |
118
|
|
|
return FeatureState(self.label, self.function) |
119
|
|
|
|
120
|
|
|
|
121
|
|
|
@attr.s |
122
|
|
|
class StateMachine: |
123
|
|
|
states = attr.ib(init=True) |
124
|
|
|
init_state = attr.ib(init=True) |
125
|
|
|
_current = attr.ib(init=False, default=attr.Factory(lambda self: self.init_state, takes_self=True)) |
126
|
|
|
|
127
|
|
|
@property |
128
|
|
|
def current(self): |
129
|
|
|
return self._current |
130
|
|
|
|
131
|
|
|
def update(self, *args, **kwargs): |
132
|
|
|
if len(args) > 1: |
133
|
|
|
self.states[args[0]] = args[1] |
134
|
|
|
self._current = args[0] |
135
|
|
|
elif len(args) > 0: |
136
|
|
|
if args[0] in self.states: |
137
|
|
|
self._current = args[0] |
138
|
|
|
else: |
139
|
|
|
raise RuntimeError(f"Requested to set the current state to '{args[0]}', " |
140
|
|
|
f"it is not in existing [{', '.join(sorted(self.states))}]") |
141
|
|
|
|
142
|
|
|
@property |
143
|
|
|
def state(self): |
144
|
|
|
"""Construct an object representing the current state""" |
145
|
|
|
return FeatureState(self._current, self.states[self._current]) |
146
|
|
|
|
147
|
|
|
|
148
|
|
|
@attr.s |
149
|
|
|
class TrackingFeature: |
150
|
|
|
feature = attr.ib(init=True) |
151
|
|
|
state_machine = attr.ib(init=True) |
152
|
|
|
variable_type = attr.ib(init=True, default=None) |
153
|
|
|
|
154
|
|
|
@classmethod |
155
|
|
|
def from_callable(cls, a_callable, label=None, variable_type=None): |
156
|
|
|
"""Construct a feature that has one extract/report capability. |
157
|
|
|
Input id is correlated to the features position on the vector (see FeatureFunction above)""" |
158
|
|
|
return TrackingFeature(FeatureFunction(a_callable, label), StateMachine({'raw': a_callable}, 'raw'), |
159
|
|
|
variable_type) |
160
|
|
|
|
161
|
|
|
def values(self, dataset): |
162
|
|
|
return self.state_machine.state.reporter(dataset) |
163
|
|
|
|
164
|
|
|
def label(self): |
165
|
|
|
return self.feature.label |
166
|
|
|
|
167
|
|
|
@property |
168
|
|
|
def state(self): |
169
|
|
|
"""Returns the current state""" |
170
|
|
|
return self.state_machine.state |
171
|
|
|
|
172
|
|
|
def update(self, *args, **kwargs): |
173
|
|
|
self.state_machine.update(*args, **kwargs) |
174
|
|
|
|
175
|
|
|
|
176
|
|
|
@attr.s |
177
|
|
|
class FeatureIndex: |
178
|
|
|
keys = attr.ib(init=True, validator=_list_validator) |
179
|
|
|
|
180
|
|
|
|
181
|
|
|
class PhiFeatureFunction: |
182
|
|
|
def __call__(self, *args, **kwargs): |
183
|
|
|
raise NotImplementedError |
184
|
|
|
|