|
1
|
|
|
import attr |
|
2
|
|
|
from green_magic.data.encoding import Encoder |
|
3
|
|
|
from .phi import PhiFunction |
|
4
|
|
|
from .features import AttributeReporter |
|
5
|
|
|
|
|
6
|
|
|
class DatapointsEncoder(Encoder): |
|
7
|
|
|
def encode(self, datapoints, attribute, **kwargs): |
|
8
|
|
|
pass |
|
9
|
|
|
|
|
10
|
|
|
@attr.s |
|
11
|
|
|
class DatapointsPhi: |
|
12
|
|
|
phi = attr.ib(init=True) |
|
13
|
|
|
datapoints = attr.ib(init=True) |
|
14
|
|
|
|
|
15
|
|
|
def __call__(self, **kwargs): |
|
16
|
|
|
return self.phi(self._datapoints, **kwargs) |
|
17
|
|
|
|
|
18
|
|
|
def _values_set(list_to_nominal): |
|
19
|
|
|
if list_to_nominal.attribute not in list_to_nominal.phi.datapoints.get_categorical_attributes(): |
|
20
|
|
|
raise RuntimeError(f"Requested to use the 'list_to_nominal' encoder, but the given variable '{list_to_nominal.attribute}'," |
|
21
|
|
|
f"seems to not belong in the categorical variables of the structured data (so can't be nominal as well).") |
|
22
|
|
|
return list_to_nominal.attribute_reporter.values_set(list_to_nominal.phi.datapoints) |
|
23
|
|
|
|
|
24
|
|
|
|
|
25
|
|
|
@attr.s |
|
26
|
|
|
@Encoder.register_as_subclass('list-to-nominal') |
|
27
|
|
|
class ListToNominal(Encoder): |
|
28
|
|
|
_datapoints = attr.ib(init=True) |
|
29
|
|
|
attribute_reporter = attr.ib(init=True, converter=AttributeReporter) |
|
30
|
|
|
phi = attr.ib(init=False, default=attr.Factory(lambda self: DatapointsPhi(PhiFunction(self.encode), self._datapoints), takes_self=True)) |
|
31
|
|
|
_set = attr.ib(init=True, default=attr.Factory(lambda self: _values_set(self), takes_self=True)) |
|
32
|
|
|
_ordering = attr.ib(init=False, default=attr.Factory(lambda self: list(ListToNominal._order(self._set)), takes_self=True), type=tuple) |
|
33
|
|
|
|
|
34
|
|
|
def encode(self, *args, **kwargs): |
|
35
|
|
|
tr = {True: 1, False: 0} |
|
36
|
|
|
return iter([tr[x in datapoint] for x in self._ordering] for datapoint in self._datapoints.iterrows()) |
|
37
|
|
|
|
|
38
|
|
|
@property |
|
39
|
|
|
def order(self): |
|
40
|
|
|
return self._ordering |
|
41
|
|
|
|
|
42
|
|
|
@staticmethod |
|
43
|
|
|
def _order(x): |
|
44
|
|
|
return iter(_ for _ in sorted(x)) |
|
45
|
|
|
|
|
46
|
|
|
@staticmethod |
|
47
|
|
|
def _phi(x): |
|
48
|
|
|
return DatapointsPhi() |
|
49
|
|
|
|
|
50
|
|
|
@property |
|
51
|
|
|
def attribute(self): |
|
52
|
|
|
return str(self.attribute_reporter) |
|
53
|
|
|
|
|
54
|
|
|
@attribute.setter |
|
55
|
|
|
def attribute(self, attribute): |
|
56
|
|
|
self.attribute_reporter = AttributeReporter(attribute) |
|
57
|
|
|
self._set = _values_set(self) |
|
58
|
|
|
self._ordering = list(ListToNominal._order(self._set)) |
|
59
|
|
|
|
|
60
|
|
|
@property |
|
61
|
|
|
def value_set(self): |
|
62
|
|
|
return self._set |