|
1
|
|
|
from abc import ABC, abstractmethod |
|
2
|
|
|
from typing import Iterable |
|
3
|
|
|
import attr |
|
4
|
|
|
|
|
5
|
|
|
from .tabular_data_interface import TabularDataInterface |
|
6
|
|
|
|
|
7
|
|
|
|
|
8
|
|
|
class DatapointsInterface(ABC): |
|
9
|
|
|
"""Represent multiple data points out of a collection of data. |
|
10
|
|
|
|
|
11
|
|
|
Classes implementing this interface, provide to their object instances (eg |
|
12
|
|
|
objects created using the classes constructor method) the 'observations' |
|
13
|
|
|
property. |
|
14
|
|
|
|
|
15
|
|
|
The 'observations' property should hold the information about the |
|
16
|
|
|
datapoints. |
|
17
|
|
|
""" |
|
18
|
|
|
|
|
19
|
|
|
@property |
|
20
|
|
|
@abstractmethod |
|
21
|
|
|
def observations(self) -> Iterable: |
|
22
|
|
|
"""The collection of datapoints is referenced through this property.""" |
|
23
|
|
|
raise NotImplementedError |
|
24
|
|
|
|
|
25
|
|
|
|
|
26
|
|
|
class StructuredDataInterface(ABC): |
|
27
|
|
|
"""Data points that are expected to have a specific set of attributes. |
|
28
|
|
|
|
|
29
|
|
|
Classes implementing this interface, provide to their object instances (eg |
|
30
|
|
|
objects created using the classes constructor method) the 'attributes' |
|
31
|
|
|
property. |
|
32
|
|
|
|
|
33
|
|
|
The 'attributes' property should hold the information about the attributes, |
|
34
|
|
|
that each data point (observation) is expected to have. |
|
35
|
|
|
""" |
|
36
|
|
|
|
|
37
|
|
|
@property |
|
38
|
|
|
@abstractmethod |
|
39
|
|
|
def attributes(self) -> Iterable: |
|
40
|
|
|
"""The set of attributes is referenced through this property.""" |
|
41
|
|
|
raise NotImplementedError |
|
42
|
|
|
|
|
43
|
|
|
|
|
44
|
|
|
class DatapointsFactory: |
|
45
|
|
|
"""Factory to construct Datapoints objects. |
|
46
|
|
|
|
|
47
|
|
|
A class that registers objects (constructors), which can be "called" to return (create) an |
|
48
|
|
|
object that implements the DatapointsInterface interface. |
|
49
|
|
|
|
|
50
|
|
|
Also, exposes the 'create' factory method that given runtime arguments, |
|
51
|
|
|
returns an object that implements the DatapointsInterface interface by |
|
52
|
|
|
delegating the creation process to one of the registered constructors. |
|
53
|
|
|
""" |
|
54
|
|
|
constructors = {} |
|
55
|
|
|
|
|
56
|
|
|
@classmethod |
|
57
|
|
|
def register_constructor(cls, name: str): |
|
58
|
|
|
"""Register, using a unique name, an object as a "runnable" constructor. |
|
59
|
|
|
|
|
60
|
|
|
A decorator method that should decorate a callable" The callable should |
|
61
|
|
|
return (create) an object that implements the DatapointsInterface |
|
62
|
|
|
interface. |
|
63
|
|
|
|
|
64
|
|
|
Args: |
|
65
|
|
|
name (str): the name under which to register the "constructor" |
|
66
|
|
|
""" |
|
67
|
|
|
def wrapper(subclass): |
|
68
|
|
|
cls.constructors[name] = subclass |
|
69
|
|
|
return subclass |
|
70
|
|
|
return wrapper |
|
71
|
|
|
|
|
72
|
|
|
@classmethod |
|
73
|
|
|
def create(cls, name, *args, **kwargs) -> Iterable: |
|
74
|
|
|
"""Create a Datapoints instance by using a registered "constructor". |
|
75
|
|
|
|
|
76
|
|
|
Args: |
|
77
|
|
|
name (str): the registered name of the "constructor" to use |
|
78
|
|
|
|
|
79
|
|
|
Raises: |
|
80
|
|
|
KeyError: happens if the input name is not found in the registry |
|
81
|
|
|
DatapointsCreationError: in case the object instantiation operation fails |
|
82
|
|
|
|
|
83
|
|
|
Returns: |
|
84
|
|
|
Iterable: instance implementing the DatapointsInterface |
|
85
|
|
|
""" |
|
86
|
|
|
if name not in cls.constructors: |
|
87
|
|
|
# TODO change to KeyError, because it better indicates the cause of the error |
|
88
|
|
|
# In our case a string/key not found in the registry causes the error |
|
89
|
|
|
raise KeyError( |
|
90
|
|
|
f"Request Engine of type '{name}'; supported are [{', '.join(sorted(cls.constructors.keys()))}]") |
|
91
|
|
|
try: |
|
92
|
|
|
return cls.constructors[name](*args, **kwargs) |
|
93
|
|
|
except Exception as exception: |
|
94
|
|
|
raise DatapointsCreationError({ |
|
95
|
|
|
'exception': exception, |
|
96
|
|
|
'name': name, |
|
97
|
|
|
'args': args, |
|
98
|
|
|
'kwargs': kwargs, |
|
99
|
|
|
}) from exception |
|
100
|
|
|
|
|
101
|
|
|
|
|
102
|
|
|
class DatapointsCreationError(Exception): |
|
103
|
|
|
def __init__(self, msg): |
|
104
|
|
|
super().__init__( |
|
105
|
|
|
f"Exception {str(msg['exception'])}. Datapoints creation failed for constructor {msg['name']}: " |
|
106
|
|
|
f"{msg['constructor']}. Args: [{', '.join(f'{i}: {str(_)}' for i, _ in enumerate(msg['args']))}]\nKwargs: " |
|
107
|
|
|
f"[{', '.join(f'{k}: {v}' for k, v in msg['kwargs'].items())}]") |
|
108
|
|
|
|
|
109
|
|
|
|
|
110
|
|
|
@attr.s |
|
111
|
|
|
@DatapointsFactory.register_constructor('structured-data') |
|
112
|
|
|
class StructuredData(DatapointsInterface, StructuredDataInterface): |
|
113
|
|
|
"""Structured data. There are specific attributes/variables per observation. |
|
114
|
|
|
|
|
115
|
|
|
Instances of this class represent collections of data (multiple data |
|
116
|
|
|
points aka observations). Each data point is expected to hold information |
|
117
|
|
|
about the specified attributes and that is why we are dealing with |
|
118
|
|
|
structured data/information in contrast to ie image data or sound data. |
|
119
|
|
|
|
|
120
|
|
|
Args: |
|
121
|
|
|
observations (object): a reference to the actual datapoints object |
|
122
|
|
|
attributes (object): a reference to the attributes object |
|
123
|
|
|
""" |
|
124
|
|
|
_observations = attr.ib(init=True) |
|
125
|
|
|
_attributes = attr.ib(init=True, converter=lambda input_value: list(input_value)) |
|
126
|
|
|
|
|
127
|
|
|
# TODO remove property and "promote above attribute '_attributes' to 'attributes' |
|
128
|
|
|
@property |
|
129
|
|
|
def attributes(self): |
|
130
|
|
|
return self._attributes |
|
131
|
|
|
|
|
132
|
|
|
@property |
|
133
|
|
|
def observations(self): |
|
134
|
|
|
return self._observations |
|
135
|
|
|
|
|
136
|
|
|
@observations.setter |
|
137
|
|
|
def observations(self, observations): |
|
138
|
|
|
self._observations = observations |
|
139
|
|
|
|
|
140
|
|
|
|
|
141
|
|
|
class AbstractTabularData(StructuredData, TabularDataInterface, ABC): |
|
142
|
|
|
"""Tabular Data with known attributes of interest. |
|
143
|
|
|
|
|
144
|
|
|
Classes inheriting from this abstract class, gain both capabilities of structured data |
|
145
|
|
|
in terms of their attributes and capabilities of a data table in terms of column, rows, etc. |
|
146
|
|
|
""" |
|
147
|
|
|
def __iter__(self): |
|
148
|
|
|
return self.iterrows() |
|
149
|
|
|
|
|
150
|
|
|
|
|
151
|
|
|
@attr.s |
|
152
|
|
|
@DatapointsFactory.register_constructor('tabular-data') |
|
153
|
|
|
class TabularData(AbstractTabularData): |
|
154
|
|
|
"""Table-like datapoints that are loaded in memory""" |
|
155
|
|
|
|
|
156
|
|
|
@property |
|
157
|
|
|
def columns(self) -> Iterable: |
|
158
|
|
|
pass |
|
159
|
|
|
|
|
160
|
|
|
@property |
|
161
|
|
|
def rows(self) -> Iterable: |
|
162
|
|
|
pass |
|
163
|
|
|
|
|
164
|
|
|
retriever = attr.ib(init=True) |
|
165
|
|
|
iterator = attr.ib(init=True) |
|
166
|
|
|
mutator = attr.ib(init=True) |
|
167
|
|
|
|
|
168
|
|
|
@property |
|
169
|
|
|
def attributes(self): |
|
170
|
|
|
return self.iterator.columnnames(self) |
|
171
|
|
|
|
|
172
|
|
|
def column(self, identifier): |
|
173
|
|
|
return self.retriever.column(identifier, self) |
|
174
|
|
|
|
|
175
|
|
|
def row(self, identifier): |
|
176
|
|
|
return self.retriever.row(identifier, self) |
|
177
|
|
|
|
|
178
|
|
|
def get_numerical_attributes(self): |
|
179
|
|
|
return self.retriever.get_numerical_attributes(self) |
|
180
|
|
|
|
|
181
|
|
|
def get_categorical_attributes(self): |
|
182
|
|
|
return iter(set(self.attributes) - set(self.retriever.get_numerical_attributes(self))) |
|
183
|
|
|
|
|
184
|
|
|
@property |
|
185
|
|
|
def nb_columns(self): |
|
186
|
|
|
return self.retriever.nb_columns(self) |
|
187
|
|
|
|
|
188
|
|
|
@property |
|
189
|
|
|
def nb_rows(self): |
|
190
|
|
|
return self.retriever.nb_rows(self) |
|
191
|
|
|
|
|
192
|
|
|
def __len__(self): |
|
193
|
|
|
return self.retriever.nb_rows(self) |
|
194
|
|
|
|
|
195
|
|
|
def __iter__(self): |
|
196
|
|
|
return self.iterator.iterrows(self) |
|
197
|
|
|
|
|
198
|
|
|
def iterrows(self): |
|
199
|
|
|
return self.iterator.iterrows(self) |
|
200
|
|
|
|
|
201
|
|
|
def itercolumns(self): |
|
202
|
|
|
return self.iterator.itercolumns(self) |
|
203
|
|
|
|