Passed
Push — dev ( 5faf7b...5a9f48 )
by Konstantinos
01:22
created

TabularData.get_categorical_attributes()   A

Complexity

Conditions 1

Size

Total Lines 2
Code Lines 2

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
eloc 2
dl 0
loc 2
rs 10
c 0
b 0
f 0
cc 1
nop 1
1
import os
2
import attr
3
from green_magic.utils import Observer, Subject
4
5
6
@attr.s(str=True, repr=True)
7
class Dataset:
8
    datapoints = attr.ib(init=True)
9
    name = attr.ib(init=True, default=None)
10
11
    _features = attr.ib(init=True, default=[])
12
    handler = attr.ib(init=True, default=None)
13
    size = attr.ib(init=False, default=attr.Factory(lambda self: len(self.datapoints) if self.datapoints else 0, takes_self=True))
14
15
    @property
16
    def features(self):
17
        return self._features
18
19
    @features.setter
20
    def features(self, features):
21
        self._features = features
22
23
    # @classmethod
24
    # def from_file(cls, file_path, name):
25
    #     return Dataset(Datapoints.from_file(file_path), name)
26
27
from abc import ABC, abstractmethod
28
29
class DatapointsInterface(ABC):
30
    """The Datapoints interface gives access to the 'observations' property."""
31
    @property
32
    @abstractmethod
33
    def observations(self):
34
        raise NotImplementedError
35
36
class StructuredDataInterface(ABC):
37
    @property
38
    @abstractmethod
39
    def attributes(self):
40
        raise NotImplementedError
41
42
43
class DatapointsFactory:
44
    constructors = {}
45
46
    @classmethod
47
    def register_constructor(cls, name):
48
        def wrapper(subclass):
49
            cls.constructors[name] = subclass
50
            return subclass
51
        return wrapper
52
53
    @classmethod
54
    def create(cls, name, *args, **kwargs) -> DatapointsInterface:
55
        if name not in cls.constructors:
56
            raise ValueError(
57
                f"Request Engine of type '{name}'; supported are [{', '.join(sorted(cls.constructors.keys()))}]")
58
        try:
59
            return cls.constructors[name](*args, **kwargs)
60
        except TypeError as e:
61
            print(f"Datapoints creation failed. Args: [{', '.join(f'{i}: {str(_)}' for i, _ in enumerate(args))}]")
62
            print(f"Kwargs: [{', '.join(f'{k}: {v}' for k, v in kwargs.items())}]")
63
            print(e)
64
            import sys
65
            sys.exit(1)
66
67
68
class BroadcastingDatapointsFactory(DatapointsFactory):
69
    subject = Subject()
70
71
    @classmethod
72
    def create(cls, name, *args, **kwargs) -> DatapointsInterface:
73
        cls.subject.state = super().create(name, *args, **kwargs)
74
        cls.subject.name = kwargs.get('id', kwargs.get('name', ''))
75
        if args and not hasattr(cls, '.name'):
76
            cls.name = getattr(args[0], 'name', '')
77
        cls.subject.notify()
78
        return cls.subject.state
79
80
81
@attr.s
82
@DatapointsFactory.register_constructor('structured-data')
83
class StructuredData(DatapointsInterface, StructuredDataInterface):
84
    """Structured data. There are specific attributes/variables per observation.
85
86
    Args:
87
        observations (object): a reference to an object that encapsulates structured data
88
    """
89
    _observations = attr.ib(init=True)
90
    _attributes = attr.ib(init=True, converter=lambda input_value: [x for x in input_value])
91
92
    @property
93
    def attributes(self):
94
        return self._attributes
95
96
    @property
97
    def observations(self):
98
        return self._observations
99
100
@attr.s
101
@DatapointsFactory.register_constructor('tabular-data')
102
class TabularData(StructuredData):
103
    """Table-like datapoints that are loaded in memory"""
104
    retriever = attr.ib(init=True)
105
    iterator = attr.ib(init=True)
106
    reporter = attr.ib(init=True)
107
108
    @property
109
    def attributes(self):
110
        return self.reporter.column_names(self)
111
112
    def column(self, identifier):
113
        return self.retriever.column(identifier, self)
114
115
    def row(self, identifier):
116
        return self.retriever.row(identifier, self)
117
118
    def get_numerical_attributes(self):
119
        return self.retriever.get_numerical_attributes(self)
120
121
    def get_categorical_attributes(self):
122
        return iter(set(self.attributes) - set([_ for _ in self.retriever.get_numerical_attributes(self)]))
123
124
    @property
125
    def nb_columns(self):
126
        return self.retriever.nb_columns(self)
127
128
    @property
129
    def nb_rows(self):
130
        return self.retriever.nb_rows(self)
131
132
    def __len__(self):
133
        return self.retriever.nb_rows(self)
134
135
    def __iter__(self):
136
        return self.iterator.iterrows(self)
137
138
    def iterrows(self):
139
        return self.iterator.iterrows(self)
140
141
    def itercolumns(self):
142
        return self.iterator.itercolumns(self)
143
144
145
@attr.s
146
class DatapointsManager(Observer):
147
    datapoints_objects = attr.ib(init=True, default={})
148
    _last_key = attr.ib(init=False, default='')
149
150
    def update(self, subject: Subject):
151
        datapoints_object = subject.state
152
        key = getattr(subject, 'name', '')
153
        print(type(datapoints_object))
154
        print(dir(subject))
155
        print(dir(subject.state))
156
        if key in self.datapoints_objects:
157
            raise RuntimeError(f"Attempted to register a new Datapoints object at the existing key '{key}'.")
158
        self.datapoints_objects[key] = datapoints_object
159
        self._last_key = key
160
161
    @property
162
    def state(self):
163
        return self._last_key
164
165
    @property
166
    def datapoints(self):
167
        try:
168
            return self.datapoints_objects[self._last_key]
169
        except KeyError as e:
170
            print(f"{e}. Requested datapoints with id '{self._last_key}', but was not found in registered [{', '.join(_ for _ in self.datapoints_objects.keys())}]")
171