StructuredData.attributes()   A
last analyzed

Complexity

Conditions 1

Size

Total Lines 3
Code Lines 3

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
cc 1
eloc 3
nop 1
dl 0
loc 3
rs 10
c 0
b 0
f 0
1
from abc import ABC, abstractmethod
2
from typing import Iterable
3
import attr
4
5
from .tabular_data_interface import TabularDataInterface
6
7
8
class DatapointsInterface(ABC):
9
    """Represent multiple data points out of a collection of data.
10
11
    Classes implementing this interface, provide to their object instances (eg
12
    objects created using the classes constructor method) the 'observations'
13
    property.
14
15
    The 'observations' property should hold the information about the
16
    datapoints.
17
    """
18
19
    @property
20
    @abstractmethod
21
    def observations(self) -> Iterable:
22
        """The collection of datapoints is referenced through this property."""
23
        raise NotImplementedError
24
25
26
class StructuredDataInterface(ABC):
27
    """Data points that are expected to have a specific set of attributes.
28
29
    Classes implementing this interface, provide to their object instances (eg
30
    objects created using the classes constructor method) the 'attributes'
31
    property.
32
33
    The 'attributes' property should hold the information about the attributes,
34
    that each data point (observation) is expected to have.
35
    """
36
37
    @property
38
    @abstractmethod
39
    def attributes(self) -> Iterable:
40
        """The set of attributes is referenced through this property."""
41
        raise NotImplementedError
42
43
44
class DatapointsFactory:
45
    """Factory to construct Datapoints objects.
46
47
    A class that registers objects (constructors), which can be "called" to return (create) an
48
    object that implements the DatapointsInterface interface.
49
50
    Also, exposes the 'create' factory method that given runtime arguments,
51
    returns an object that implements the DatapointsInterface interface by
52
    delegating the creation process to one of the registered constructors.
53
    """
54
    constructors = {}
55
56
    @classmethod
57
    def register_constructor(cls, name: str):
58
        """Register, using a unique name, an object as a "runnable" constructor.
59
60
        A decorator method that should decorate a callable" The callable should
61
        return (create) an object that implements the DatapointsInterface
62
        interface.
63
64
        Args:
65
            name (str): the name under which to register the "constructor"
66
        """
67
        def wrapper(subclass):
68
            cls.constructors[name] = subclass
69
            return subclass
70
        return wrapper
71
72
    @classmethod
73
    def create(cls, name, *args, **kwargs) -> Iterable:
74
        """Create a Datapoints instance by using a registered "constructor".
75
76
        Args:
77
            name (str): the registered name of the "constructor" to use
78
79
        Raises:
80
            KeyError: happens if the input name is not found in the registry
81
            DatapointsCreationError: in case the object instantiation operation fails
82
83
        Returns:
84
            Iterable: instance implementing the DatapointsInterface
85
        """
86
        if name not in cls.constructors:
87
            # TODO change to KeyError, because it better indicates the cause of the error
88
            # In our case a string/key not found in the registry causes the error
89
            raise KeyError(
90
                f"Request Engine of type '{name}'; supported are [{', '.join(sorted(cls.constructors.keys()))}]")
91
        try:
92
            return cls.constructors[name](*args, **kwargs)
93
        except Exception as exception:
94
            raise DatapointsCreationError({
95
                'exception': exception,
96
                'name': name,
97
                'args': args,
98
                'kwargs': kwargs,
99
            }) from exception
100
101
102
class DatapointsCreationError(Exception):
103
    def __init__(self, msg):
104
        super().__init__(
105
            f"Exception {str(msg['exception'])}. Datapoints creation failed for constructor {msg['name']}: "
106
            f"{msg['constructor']}. Args: [{', '.join(f'{i}: {str(_)}' for i, _ in enumerate(msg['args']))}]\nKwargs: "
107
            f"[{', '.join(f'{k}: {v}' for k, v in msg['kwargs'].items())}]")
108
109
110
@attr.s
111
@DatapointsFactory.register_constructor('structured-data')
112
class StructuredData(DatapointsInterface, StructuredDataInterface):
113
    """Structured data. There are specific attributes/variables per observation.
114
115
    Instances of this class represent collections of data (multiple data
116
    points aka observations). Each data point is expected to hold information
117
    about the specified attributes and that is why we are dealing with
118
    structured data/information in contrast to ie image data or sound data.
119
120
    Args:
121
        observations (object): a reference to the actual datapoints object
122
        attributes (object): a reference to the attributes object
123
    """
124
    _observations = attr.ib(init=True)
125
    _attributes = attr.ib(init=True, converter=lambda input_value: list(input_value))
126
127
    # TODO remove property and "promote above attribute '_attributes' to 'attributes'
128
    @property
129
    def attributes(self):
130
        return self._attributes
131
132
    @property
133
    def observations(self):
134
        return self._observations
135
136
    @observations.setter
137
    def observations(self, observations):
138
        self._observations = observations
139
140
141
class AbstractTabularData(StructuredData, TabularDataInterface, ABC):
142
    """Tabular Data with known attributes of interest.
143
144
    Classes inheriting from this abstract class, gain both capabilities of structured data
145
    in terms of their attributes and capabilities of a data table in terms of column, rows, etc.
146
    """
147
    def __iter__(self):
148
        return self.iterrows()
149
150
151
@attr.s
152
@DatapointsFactory.register_constructor('tabular-data')
153
class TabularData(AbstractTabularData):
154
    """Table-like datapoints that are loaded in memory"""
155
    retriever = attr.ib(init=True)
156
    iterator = attr.ib(init=True)
157
    mutator = attr.ib(init=True)
158
159
    @property
160
    def columns(self) -> Iterable:
161
        pass
162
163
    @property
164
    def rows(self) -> Iterable:
165
        pass
166
167
    @property
168
    def attributes(self):
169
        return self.iterator.columnnames(self)
170
171
    def column(self, identifier):
172
        return self.retriever.column(identifier, self)
173
174
    def row(self, identifier):
175
        return self.retriever.row(identifier, self)
176
177
    def get_numerical_attributes(self):
178
        return self.retriever.get_numerical_attributes(self)
179
180
    def get_categorical_attributes(self):
181
        return iter(set(self.attributes) - set(self.retriever.get_numerical_attributes(self)))
182
183
    @property
184
    def nb_columns(self):
185
        return self.retriever.nb_columns(self)
186
187
    @property
188
    def nb_rows(self):
189
        return self.retriever.nb_rows(self)
190
191
    def __len__(self):
192
        return self.retriever.nb_rows(self)
193
194
    def __iter__(self):
195
        return self.iterator.iterrows(self)
196
197
    def iterrows(self):
198
        return self.iterator.iterrows(self)
199
200
    def itercolumns(self):
201
        return self.iterator.itercolumns(self)
202
203
    def add_column(self, values, column_name, **kwargs):
204
        self.mutator.add_column(self, values, column_name, **kwargs)
205