Completed
Push — appveyor ( 280314...2c0e2c )
by Konstantinos
02:09
created

TabularData.get_numerical_attributes()   A

Complexity

Conditions 1

Size

Total Lines 2
Code Lines 2

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
cc 1
eloc 2
nop 1
dl 0
loc 2
rs 10
c 0
b 0
f 0
1
from abc import ABC, abstractmethod
2
import os
3
import attr
4
5
from so_magic.utils import Observer
6
7
8
@attr.s(str=True, repr=True)
9
class Dataset:
10
    datapoints = attr.ib(init=True)
11
    name = attr.ib(init=True, default=None)
12
13
    _features = attr.ib(init=True, default=[])
14
    size = attr.ib(init=False, default=attr.Factory(lambda self: len(self.datapoints) if self.datapoints else 0, takes_self=True))
15
16
    @property
17
    def features(self):
18
        return self._features
19
20
    @features.setter
21
    def features(self, features):
22
        self._features = features
23
24
25
class DatapointsInterface(ABC):
26
    """The Datapoints interface gives access to the 'observations' property."""
27
    @property
28
    @abstractmethod
29
    def observations(self):
30
        raise NotImplementedError
31
32
class StructuredDataInterface(ABC):
33
    @property
34
    @abstractmethod
35
    def attributes(self):
36
        raise NotImplementedError
37
38
39
class DatapointsFactory:
40
    constructors = {}
41
42
    @classmethod
43
    def register_constructor(cls, name):
44
        def wrapper(subclass):
45
            cls.constructors[name] = subclass
46
            return subclass
47
        return wrapper
48
49
    @classmethod
50
    def create(cls, name, *args, **kwargs) -> DatapointsInterface:
51
        if name not in cls.constructors:
52
            raise ValueError(
53
                f"Request Engine of type '{name}'; supported are [{', '.join(sorted(cls.constructors.keys()))}]")
54
        try:
55
            return cls.constructors[name](*args, **kwargs)
56
        except TypeError as e:
57
            print(f"Datapoints creation failed. Args: [{', '.join(f'{i}: {str(_)}' for i, _ in enumerate(args))}]")
58
            print(f"Kwargs: [{', '.join(f'{k}: {v}' for k, v in kwargs.items())}]")
59
            print(e)
60
            import sys
61
            sys.exit(1)
62
63
@attr.s
64
class BroadcastingDatapointsFactory(DatapointsFactory):
65
    subject = attr.ib(init=True)
66
67
    def create(self, datapoints_factory_type, *args, **kwargs) -> DatapointsInterface:
68
        self.subject.name = kwargs.pop('id', kwargs.pop('name', kwargs.pop('file_path', '')))
69
        if kwargs:
70
            msg = f"Kwargs: [{', '.join(f'{k}: {v}' for k, v in kwargs.items())}]"
71
            raise RuntimeError("The 'create' method of DatapointsFactory does not support kwargs:", msg)
72
        self.subject.state = super().create(datapoints_factory_type, *args, **kwargs)
73
        if args and not hasattr(self, '.name'):
74
            self.name = getattr(args[0], 'name', '')
75
        self.subject.notify()
76
        return self.subject.state
77
78
79
@attr.s
80
@DatapointsFactory.register_constructor('structured-data')
81
class StructuredData(DatapointsInterface, StructuredDataInterface):
82
    """Structured data. There are specific attributes/variables per observation.
83
84
    Args:
85
        observations (object): a reference to an object that encapsulates structured data
86
    """
87
    _observations = attr.ib(init=True)
88
    _attributes = attr.ib(init=True, converter=lambda input_value: [x for x in input_value])
89
90
    @property
91
    def attributes(self):
92
        return self._attributes
93
94
    @property
95
    def observations(self):
96
        return self._observations
97
98
    @observations.setter
99
    def observations(self, observations):
100
        self._observations = observations
101
102
@attr.s
103
@DatapointsFactory.register_constructor('tabular-data')
104
class TabularData(StructuredData):
105
    """Table-like datapoints that are loaded in memory"""
106
    retriever = attr.ib(init=True)
107
    iterator = attr.ib(init=True)
108
    mutator = attr.ib(init=True)
109
110
    @property
111
    def attributes(self):
112
        return self.iterator.columnnames(self)
113
114
    def column(self, identifier):
115
        return self.retriever.column(identifier, self)
116
117
    def row(self, identifier):
118
        return self.retriever.row(identifier, self)
119
120
    def get_numerical_attributes(self):
121
        return self.retriever.get_numerical_attributes(self)
122
123
    def get_categorical_attributes(self):
124
        return iter(set(self.attributes) - set([_ for _ in self.retriever.get_numerical_attributes(self)]))
125
126
    @property
127
    def nb_columns(self):
128
        return self.retriever.nb_columns(self)
129
130
    @property
131
    def nb_rows(self):
132
        return self.retriever.nb_rows(self)
133
134
    def __len__(self):
135
        return self.retriever.nb_rows(self)
136
137
    def __iter__(self):
138
        return self.iterator.iterrows(self)
139
140
    def iterrows(self):
141
        return self.iterator.iterrows(self)
142
143
    def itercolumns(self):
144
        return self.iterator.itercolumns(self)
145
146
147
@attr.s
148
class DatapointsManager(Observer):
149
    datapoints_objects = attr.ib(init=True, default={})
150
    _last_key = attr.ib(init=False, default='')
151
152
    def update(self, subject):
153
        datapoints_object = subject.state
154
        key = getattr(subject, 'name', '')
155
        if key == '':
156
            raise RuntimeError(f"Subject {Subject} with state {str(subject.state)} resulted in an empty string as key (to use in dict/hash).")
157
        if key in self.datapoints_objects:
158
            raise RuntimeError(f"Attempted to register a new Datapoints object at the existing key '{key}'.")
159
        self.datapoints_objects[key] = datapoints_object
160
        self._last_key = key
161
162
    @property
163
    def state(self):
164
        return self._last_key
165
166
    @property
167
    def datapoints(self):
168
        try:
169
            return self.datapoints_objects[self._last_key]
170
        except KeyError as e:
171
            print(f"{e}. Requested datapoints with id '{self._last_key}', but was not found in registered [{', '.join(_ for _ in self.datapoints_objects.keys())}]")
172