Passed
Push — master ( cc7a4b...4d42d8 )
by Konstantinos
43s queued 14s
created

so_magic.data.datapoints.datapoints   A

Complexity

Total Complexity 24

Size/Duplication

Total Lines 203
Duplicated Lines 0 %

Importance

Changes 0
Metric Value
eloc 95
dl 0
loc 203
rs 10
c 0
b 0
f 0
wmc 24

21 Methods

Rating   Name   Duplication   Size   Complexity  
A StructuredDataInterface.attributes() 0 5 1
A DatapointsInterface.observations() 0 5 1
A DatapointsFactory.register_constructor() 0 15 1
A TabularData.get_numerical_attributes() 0 2 1
A TabularData.row() 0 2 1
A TabularData.__len__() 0 2 1
A TabularData.attributes() 0 3 1
A TabularData.iterrows() 0 2 1
A TabularData.columns() 0 3 1
A TabularData.rows() 0 3 1
A TabularData.itercolumns() 0 2 1
A StructuredData.attributes() 0 3 1
A StructuredData.observations() 0 3 1
A TabularData.get_categorical_attributes() 0 2 1
A DatapointsCreationError.__init__() 0 3 1
A AbstractTabularData.__iter__() 0 2 1
A TabularData.__iter__() 0 2 1
A TabularData.nb_rows() 0 3 1
A TabularData.column() 0 2 1
A TabularData.nb_columns() 0 3 1
A DatapointsFactory.create() 0 28 3
1
from abc import ABC, abstractmethod
2
from typing import Iterable
3
import attr
4
5
from .tabular_data_interface import TabularDataInterface
6
7
8
class DatapointsInterface(ABC):
9
    """Represent multiple data points out of a collection of data.
10
11
    Classes implementing this interface, provide to their object instances (eg
12
    objects created using the classes constructor method) the 'observations'
13
    property.
14
15
    The 'observations' property should hold the information about the
16
    datapoints.
17
    """
18
19
    @property
20
    @abstractmethod
21
    def observations(self) -> Iterable:
22
        """The collection of datapoints is referenced through this property."""
23
        raise NotImplementedError
24
25
26
class StructuredDataInterface(ABC):
27
    """Data points that are expected to have a specific set of attributes.
28
29
    Classes implementing this interface, provide to their object instances (eg
30
    objects created using the classes constructor method) the 'attributes'
31
    property.
32
33
    The 'attributes' property should hold the information about the attributes,
34
    that each data point (observation) is expected to have.
35
    """
36
37
    @property
38
    @abstractmethod
39
    def attributes(self) -> Iterable:
40
        """The set of attributes is referenced through this property."""
41
        raise NotImplementedError
42
43
44
class DatapointsFactory:
45
    """Factory to construct Datapoints objects.
46
47
    A class that registers objects (constructors), which can be "called" to return (create) an
48
    object that implements the DatapointsInterface interface.
49
50
    Also, exposes the 'create' factory method that given runtime arguments,
51
    returns an object that implements the DatapointsInterface interface by
52
    delegating the creation process to one of the registered constructors.
53
    """
54
    constructors = {}
55
56
    @classmethod
57
    def register_constructor(cls, name: str):
58
        """Register, using a unique name, an object as a "runnable" constructor.
59
60
        A decorator method that should decorate a callable" The callable should
61
        return (create) an object that implements the DatapointsInterface
62
        interface.
63
64
        Args:
65
            name (str): the name under which to register the "constructor"
66
        """
67
        def wrapper(subclass):
68
            cls.constructors[name] = subclass
69
            return subclass
70
        return wrapper
71
72
    @classmethod
73
    def create(cls, name, *args, **kwargs) -> Iterable:
74
        """Create a Datapoints instance by using a registered "constructor".
75
76
        Args:
77
            name (str): the registered name of the "constructor" to use
78
79
        Raises:
80
            KeyError: happens if the input name is not found in the registry
81
            DatapointsCreationError: in case the object instantiation operation fails
82
83
        Returns:
84
            Iterable: instance implementing the DatapointsInterface
85
        """
86
        if name not in cls.constructors:
87
            # TODO change to KeyError, because it better indicates the cause of the error
88
            # In our case a string/key not found in the registry causes the error
89
            raise KeyError(
90
                f"Request Engine of type '{name}'; supported are [{', '.join(sorted(cls.constructors.keys()))}]")
91
        try:
92
            return cls.constructors[name](*args, **kwargs)
93
        except Exception as exception:
94
            raise DatapointsCreationError({
95
                'exception': exception,
96
                'name': name,
97
                'args': args,
98
                'kwargs': kwargs,
99
            }) from exception
100
101
102
class DatapointsCreationError(Exception):
103
    def __init__(self, msg):
104
        super().__init__(
105
            f"Exception {str(msg['exception'])}. Datapoints creation failed for constructor {msg['name']}: "
106
            f"{msg['constructor']}. Args: [{', '.join(f'{i}: {str(_)}' for i, _ in enumerate(msg['args']))}]\nKwargs: "
107
            f"[{', '.join(f'{k}: {v}' for k, v in msg['kwargs'].items())}]")
108
109
110
@attr.s
111
@DatapointsFactory.register_constructor('structured-data')
112
class StructuredData(DatapointsInterface, StructuredDataInterface):
113
    """Structured data. There are specific attributes/variables per observation.
114
115
    Instances of this class represent collections of data (multiple data
116
    points aka observations). Each data point is expected to hold information
117
    about the specified attributes and that is why we are dealing with
118
    structured data/information in contrast to ie image data or sound data.
119
120
    Args:
121
        observations (object): a reference to the actual datapoints object
122
        attributes (object): a reference to the attributes object
123
    """
124
    _observations = attr.ib(init=True)
125
    _attributes = attr.ib(init=True, converter=lambda input_value: list(input_value))
126
127
    # TODO remove property and "promote above attribute '_attributes' to 'attributes'
128
    @property
129
    def attributes(self):
130
        return self._attributes
131
132
    @property
133
    def observations(self):
134
        return self._observations
135
136
    @observations.setter
137
    def observations(self, observations):
138
        self._observations = observations
139
140
141
class AbstractTabularData(StructuredData, TabularDataInterface, ABC):
142
    """Tabular Data with known attributes of interest.
143
144
    Classes inheriting from this abstract class, gain both capabilities of structured data
145
    in terms of their attributes and capabilities of a data table in terms of column, rows, etc.
146
    """
147
    def __iter__(self):
148
        return self.iterrows()
149
150
151
@attr.s
152
@DatapointsFactory.register_constructor('tabular-data')
153
class TabularData(AbstractTabularData):
154
    """Table-like datapoints that are loaded in memory"""
155
156
    @property
157
    def columns(self) -> Iterable:
158
        pass
159
160
    @property
161
    def rows(self) -> Iterable:
162
        pass
163
164
    retriever = attr.ib(init=True)
165
    iterator = attr.ib(init=True)
166
    mutator = attr.ib(init=True)
167
168
    @property
169
    def attributes(self):
170
        return self.iterator.columnnames(self)
171
172
    def column(self, identifier):
173
        return self.retriever.column(identifier, self)
174
175
    def row(self, identifier):
176
        return self.retriever.row(identifier, self)
177
178
    def get_numerical_attributes(self):
179
        return self.retriever.get_numerical_attributes(self)
180
181
    def get_categorical_attributes(self):
182
        return iter(set(self.attributes) - set(self.retriever.get_numerical_attributes(self)))
183
184
    @property
185
    def nb_columns(self):
186
        return self.retriever.nb_columns(self)
187
188
    @property
189
    def nb_rows(self):
190
        return self.retriever.nb_rows(self)
191
192
    def __len__(self):
193
        return self.retriever.nb_rows(self)
194
195
    def __iter__(self):
196
        return self.iterator.iterrows(self)
197
198
    def iterrows(self):
199
        return self.iterator.iterrows(self)
200
201
    def itercolumns(self):
202
        return self.iterator.itercolumns(self)
203