1
|
|
|
from abc import ABC, abstractmethod |
2
|
|
|
from typing import Iterable |
3
|
|
|
import attr |
4
|
|
|
|
5
|
|
|
from .tabular_data_interface import TabularDataInterface |
6
|
|
|
|
7
|
|
|
|
8
|
|
|
class DatapointsInterface(ABC): |
9
|
|
|
"""Represent multiple data points out of a collection of data. |
10
|
|
|
|
11
|
|
|
Classes implementing this interface, provide to their object instances (eg |
12
|
|
|
objects created using the classes constructor method) the 'observations' |
13
|
|
|
property. |
14
|
|
|
|
15
|
|
|
The 'observations' property should hold the information about the |
16
|
|
|
datapoints. |
17
|
|
|
""" |
18
|
|
|
|
19
|
|
|
@property |
20
|
|
|
@abstractmethod |
21
|
|
|
def observations(self) -> Iterable: |
22
|
|
|
"""The collection of datapoints is referenced through this property.""" |
23
|
|
|
raise NotImplementedError |
24
|
|
|
|
25
|
|
|
|
26
|
|
|
class StructuredDataInterface(ABC): |
27
|
|
|
"""Data points that are expected to have a specific set of attributes. |
28
|
|
|
|
29
|
|
|
Classes implementing this interface, provide to their object instances (eg |
30
|
|
|
objects created using the classes constructor method) the 'attributes' |
31
|
|
|
property. |
32
|
|
|
|
33
|
|
|
The 'attributes' property should hold the information about the attributes, |
34
|
|
|
that each data point (observation) is expected to have. |
35
|
|
|
""" |
36
|
|
|
|
37
|
|
|
@property |
38
|
|
|
@abstractmethod |
39
|
|
|
def attributes(self) -> Iterable: |
40
|
|
|
"""The set of attributes is referenced through this property.""" |
41
|
|
|
raise NotImplementedError |
42
|
|
|
|
43
|
|
|
|
44
|
|
|
class DatapointsFactory: |
45
|
|
|
"""Factory to construct Datapoints objects. |
46
|
|
|
|
47
|
|
|
A class that registers objects (constructors), which can be "called" to return (create) an |
48
|
|
|
object that implements the DatapointsInterface interface. |
49
|
|
|
|
50
|
|
|
Also, exposes the 'create' factory method that given runtime arguments, |
51
|
|
|
returns an object that implements the DatapointsInterface interface by |
52
|
|
|
delegating the creation process to one of the registered constructors. |
53
|
|
|
""" |
54
|
|
|
constructors = {} |
55
|
|
|
|
56
|
|
|
@classmethod |
57
|
|
|
def register_constructor(cls, name: str): |
58
|
|
|
"""Register, using a unique name, an object as a "runnable" constructor. |
59
|
|
|
|
60
|
|
|
A decorator method that should decorate a callable" The callable should |
61
|
|
|
return (create) an object that implements the DatapointsInterface |
62
|
|
|
interface. |
63
|
|
|
|
64
|
|
|
Args: |
65
|
|
|
name (str): the name under which to register the "constructor" |
66
|
|
|
""" |
67
|
|
|
def wrapper(subclass): |
68
|
|
|
cls.constructors[name] = subclass |
69
|
|
|
return subclass |
70
|
|
|
return wrapper |
71
|
|
|
|
72
|
|
|
@classmethod |
73
|
|
|
def create(cls, name, *args, **kwargs) -> Iterable: |
74
|
|
|
"""Create a Datapoints instance by using a registered "constructor". |
75
|
|
|
|
76
|
|
|
Args: |
77
|
|
|
name (str): the registered name of the "constructor" to use |
78
|
|
|
|
79
|
|
|
Raises: |
80
|
|
|
KeyError: happens if the input name is not found in the registry |
81
|
|
|
DatapointsCreationError: in case the object instantiation operation fails |
82
|
|
|
|
83
|
|
|
Returns: |
84
|
|
|
Iterable: instance implementing the DatapointsInterface |
85
|
|
|
""" |
86
|
|
|
if name not in cls.constructors: |
87
|
|
|
# TODO change to KeyError, because it better indicates the cause of the error |
88
|
|
|
# In our case a string/key not found in the registry causes the error |
89
|
|
|
raise KeyError( |
90
|
|
|
f"Request Engine of type '{name}'; supported are [{', '.join(sorted(cls.constructors.keys()))}]") |
91
|
|
|
try: |
92
|
|
|
return cls.constructors[name](*args, **kwargs) |
93
|
|
|
except Exception as exception: |
94
|
|
|
raise DatapointsCreationError({ |
95
|
|
|
'exception': exception, |
96
|
|
|
'name': name, |
97
|
|
|
'args': args, |
98
|
|
|
'kwargs': kwargs, |
99
|
|
|
}) from exception |
100
|
|
|
|
101
|
|
|
|
102
|
|
|
class DatapointsCreationError(Exception): |
103
|
|
|
def __init__(self, msg): |
104
|
|
|
super().__init__( |
105
|
|
|
f"Exception {str(msg['exception'])}. Datapoints creation failed for constructor {msg['name']}: " |
106
|
|
|
f"{msg['constructor']}. Args: [{', '.join(f'{i}: {str(_)}' for i, _ in enumerate(msg['args']))}]\nKwargs: " |
107
|
|
|
f"[{', '.join(f'{k}: {v}' for k, v in msg['kwargs'].items())}]") |
108
|
|
|
|
109
|
|
|
|
110
|
|
|
@attr.s |
111
|
|
|
@DatapointsFactory.register_constructor('structured-data') |
112
|
|
|
class StructuredData(DatapointsInterface, StructuredDataInterface): |
113
|
|
|
"""Structured data. There are specific attributes/variables per observation. |
114
|
|
|
|
115
|
|
|
Instances of this class represent collections of data (multiple data |
116
|
|
|
points aka observations). Each data point is expected to hold information |
117
|
|
|
about the specified attributes and that is why we are dealing with |
118
|
|
|
structured data/information in contrast to ie image data or sound data. |
119
|
|
|
|
120
|
|
|
Args: |
121
|
|
|
observations (object): a reference to the actual datapoints object |
122
|
|
|
attributes (object): a reference to the attributes object |
123
|
|
|
""" |
124
|
|
|
_observations = attr.ib(init=True) |
125
|
|
|
_attributes = attr.ib(init=True, converter=lambda input_value: list(input_value)) |
126
|
|
|
|
127
|
|
|
# TODO remove property and "promote above attribute '_attributes' to 'attributes' |
128
|
|
|
@property |
129
|
|
|
def attributes(self): |
130
|
|
|
return self._attributes |
131
|
|
|
|
132
|
|
|
@property |
133
|
|
|
def observations(self): |
134
|
|
|
return self._observations |
135
|
|
|
|
136
|
|
|
@observations.setter |
137
|
|
|
def observations(self, observations): |
138
|
|
|
self._observations = observations |
139
|
|
|
|
140
|
|
|
|
141
|
|
|
class AbstractTabularData(StructuredData, TabularDataInterface, ABC): |
142
|
|
|
"""Tabular Data with known attributes of interest. |
143
|
|
|
|
144
|
|
|
Classes inheriting from this abstract class, gain both capabilities of structured data |
145
|
|
|
in terms of their attributes and capabilities of a data table in terms of column, rows, etc. |
146
|
|
|
""" |
147
|
|
|
def __iter__(self): |
148
|
|
|
return self.iterrows() |
149
|
|
|
|
150
|
|
|
|
151
|
|
|
@attr.s |
152
|
|
|
@DatapointsFactory.register_constructor('tabular-data') |
153
|
|
|
class TabularData(AbstractTabularData): |
154
|
|
|
"""Table-like datapoints that are loaded in memory""" |
155
|
|
|
|
156
|
|
|
@property |
157
|
|
|
def columns(self) -> Iterable: |
158
|
|
|
pass |
159
|
|
|
|
160
|
|
|
@property |
161
|
|
|
def rows(self) -> Iterable: |
162
|
|
|
pass |
163
|
|
|
|
164
|
|
|
retriever = attr.ib(init=True) |
165
|
|
|
iterator = attr.ib(init=True) |
166
|
|
|
mutator = attr.ib(init=True) |
167
|
|
|
|
168
|
|
|
@property |
169
|
|
|
def attributes(self): |
170
|
|
|
return self.iterator.columnnames(self) |
171
|
|
|
|
172
|
|
|
def column(self, identifier): |
173
|
|
|
return self.retriever.column(identifier, self) |
174
|
|
|
|
175
|
|
|
def row(self, identifier): |
176
|
|
|
return self.retriever.row(identifier, self) |
177
|
|
|
|
178
|
|
|
def get_numerical_attributes(self): |
179
|
|
|
return self.retriever.get_numerical_attributes(self) |
180
|
|
|
|
181
|
|
|
def get_categorical_attributes(self): |
182
|
|
|
return iter(set(self.attributes) - set(self.retriever.get_numerical_attributes(self))) |
183
|
|
|
|
184
|
|
|
@property |
185
|
|
|
def nb_columns(self): |
186
|
|
|
return self.retriever.nb_columns(self) |
187
|
|
|
|
188
|
|
|
@property |
189
|
|
|
def nb_rows(self): |
190
|
|
|
return self.retriever.nb_rows(self) |
191
|
|
|
|
192
|
|
|
def __len__(self): |
193
|
|
|
return self.retriever.nb_rows(self) |
194
|
|
|
|
195
|
|
|
def __iter__(self): |
196
|
|
|
return self.iterator.iterrows(self) |
197
|
|
|
|
198
|
|
|
def iterrows(self): |
199
|
|
|
return self.iterator.iterrows(self) |
200
|
|
|
|
201
|
|
|
def itercolumns(self): |
202
|
|
|
return self.iterator.itercolumns(self) |
203
|
|
|
|