|
1
|
|
|
import attr |
|
2
|
|
|
from .features import AttributeReporter |
|
3
|
|
|
|
|
4
|
|
|
|
|
5
|
|
|
def _values_set(list_to_nominal): |
|
6
|
|
|
if str(list_to_nominal.attribute_reporter) not in\ |
|
7
|
|
|
list_to_nominal.datapoints.get_categorical_attributes(): |
|
8
|
|
|
raise RuntimeError(f"Requested to use the 'list_to_nominal' encoder, but the given variable " |
|
9
|
|
|
f"'{str(list_to_nominal.attribute_reporter)}', seems to not belong in the categorical " |
|
10
|
|
|
"variables of the structured data (so can't be nominal as well).") |
|
11
|
|
|
return list_to_nominal.attribute_reporter.values_set(list_to_nominal.datapoints_attribute_phi.datapoints) |
|
12
|
|
|
|
|
13
|
|
|
|
|
14
|
|
|
@attr.s |
|
15
|
|
|
class ListOfCategoricalPhi: |
|
16
|
|
|
datapoints = attr.ib() |
|
17
|
|
|
attribute_reporter = attr.ib(init=False, default=None) |
|
18
|
|
|
_set = attr.ib(init=False, default=set()) |
|
19
|
|
|
_ordering = attr.ib(init=False, default=list()) |
|
20
|
|
|
_binary_transformer = attr.ib(init=False, default={True: 1, False: 0}) |
|
21
|
|
|
|
|
22
|
|
|
@property |
|
23
|
|
|
def attribute(self): |
|
24
|
|
|
return str(self.attribute_reporter) |
|
25
|
|
|
|
|
26
|
|
|
@attribute.setter |
|
27
|
|
|
def attribute(self, attribute): |
|
28
|
|
|
self.attribute_reporter = AttributeReporter(attribute) |
|
29
|
|
|
self._set = _values_set(self) |
|
30
|
|
|
self._ordering = list(ListOfCategoricalPhi._order(self._set)) |
|
31
|
|
|
|
|
32
|
|
|
def __call__(self, *args, **kwargs): |
|
33
|
|
|
""" |
|
34
|
|
|
Args: |
|
35
|
|
|
attribute (str): the attribute we wish to target for input to the phi function |
|
36
|
|
|
""" |
|
37
|
|
|
self.attribute_reporter = args[0] |
|
38
|
|
|
# TODO fix the below |
|
39
|
|
|
return iter([self._binary_transformer[x in datapoint] for x in self._ordering] |
|
40
|
|
|
for datapoint in self.datapoints.iterrows()) |
|
41
|
|
|
|
|
42
|
|
|
@staticmethod |
|
43
|
|
|
def _order(values_iterable): |
|
44
|
|
|
return iter(_ for _ in sorted(values_iterable)) |
|
45
|
|
|
|