1
|
|
|
import attr |
2
|
|
|
from .features import AttributeReporter |
3
|
|
|
|
4
|
|
|
|
5
|
|
|
@attr.s |
6
|
|
|
class DatapointsAttributePhi: |
7
|
|
|
datapoints = attr.ib(init=True) |
8
|
|
|
|
9
|
|
|
def _extract(self, attribute): |
10
|
|
|
return self.datapoints.column(attribute) |
11
|
|
|
|
12
|
|
|
|
13
|
|
|
def _values_set(list_to_nominal): |
14
|
|
|
if str(list_to_nominal.attribute_reporter) not in\ |
15
|
|
|
list_to_nominal.datapoints_attribute_phi.datapoints.get_categorical_attributes(): |
16
|
|
|
raise RuntimeError(f"Requested to use the 'list_to_nominal' encoder, but the given variable " |
17
|
|
|
f"'{str(list_to_nominal.attribute_reporter)}', seems to not belong in the categorical " |
18
|
|
|
"variables of the structured data (so can't be nominal as well).") |
19
|
|
|
return list_to_nominal.attribute_reporter.values_set(list_to_nominal.datapoints_attribute_phi.datapoints) |
20
|
|
|
|
21
|
|
|
|
22
|
|
|
@attr.s |
23
|
|
|
class ListOfCategoricalPhi: |
24
|
|
|
datapoints_attribute_phi = attr.ib(init=True) |
25
|
|
|
attribute_reporter = attr.ib(init=False, default=None) |
26
|
|
|
_set = attr.ib(init=False, default=set()) |
27
|
|
|
_ordering = attr.ib(init=False, default=list()) |
28
|
|
|
_binary_transformer = attr.ib(init=False, default={True: 1, False: 0}) |
29
|
|
|
|
30
|
|
|
@property |
31
|
|
|
def attribute(self): |
32
|
|
|
return str(self.attribute_reporter) |
33
|
|
|
|
34
|
|
|
@attribute.setter |
35
|
|
|
def attribute(self, attribute): |
36
|
|
|
self.attribute_reporter = AttributeReporter(attribute) |
37
|
|
|
self._set = _values_set(self) |
38
|
|
|
self._ordering = list(ListOfCategoricalPhi._order(self._set)) |
39
|
|
|
|
40
|
|
|
def __call__(self, *args, **kwargs): |
41
|
|
|
""" |
42
|
|
|
Args: |
43
|
|
|
attribute (str): the attribute we wish to target for input to the phi function |
44
|
|
|
""" |
45
|
|
|
self.attribute_reporter = args[0] |
46
|
|
|
return iter([self._binary_transformer[x in datapoint] for x in self._ordering] |
47
|
|
|
for datapoint in self.datapoints_attribute_phi.datapoints.iterrows()) |
48
|
|
|
|
49
|
|
|
@staticmethod |
50
|
|
|
def _order(values_iterable): |
51
|
|
|
return iter(_ for _ in sorted(values_iterable)) |
52
|
|
|
|