Passed
Pull Request — master (#22)
by Konstantinos
02:16
created

so_magic.data.datapoints.datapoints   A

Complexity

Total Complexity 23

Size/Duplication

Total Lines 195
Duplicated Lines 0 %

Importance

Changes 0
Metric Value
eloc 88
dl 0
loc 195
rs 10
c 0
b 0
f 0
wmc 23

20 Methods

Rating   Name   Duplication   Size   Complexity  
A TabularData.get_numerical_attributes() 0 2 1
A TabularData.row() 0 2 1
A TabularData.__len__() 0 2 1
A TabularData.attributes() 0 3 1
A TabularData.iterrows() 0 2 1
A TabularData.columns() 0 3 1
A TabularData.rows() 0 3 1
A TabularData.itercolumns() 0 2 1
A StructuredData.attributes() 0 3 1
A StructuredData.observations() 0 3 1
A StructuredDataInterface.attributes() 0 5 1
A TabularData.get_categorical_attributes() 0 2 1
A AbstractTabularData.__iter__() 0 2 1
A DatapointsInterface.observations() 0 5 1
A TabularData.__iter__() 0 2 1
A TabularData.nb_rows() 0 3 1
A TabularData.column() 0 2 1
A TabularData.nb_columns() 0 3 1
A DatapointsFactory.create() 0 24 3
A DatapointsFactory.register_constructor() 0 15 1
1
from abc import ABC, abstractmethod
2
from typing import Iterable
3
import attr
4
5
from .tabular_data_interface import TabularDataInterface
6
7
8
class DatapointsInterface(ABC):
9
    """Represent multiple data points out of a collection of data.
10
11
    Classes implementing this interface, provide to their object instances (eg
12
    objects created using the classes constructor method) the 'observations'
13
    property.
14
15
    The 'observations' property should hold the information about the
16
    datapoints.
17
    """
18
19
    @property
20
    @abstractmethod
21
    def observations(self) -> Iterable:
22
        """The collection of datapoints is referenced through this property."""
23
        raise NotImplementedError
24
25
26
class StructuredDataInterface(ABC):
27
    """Data points that are expected to have a specific set of attributes.
28
29
    Classes implementing this interface, provide to their object instances (eg
30
    objects created using the classes constructor method) the 'attributes'
31
    property.
32
33
    The 'attributes' property should hold the information about the attributes,
34
    that each data point (observation) is expected to have.
35
    """
36
37
    @property
38
    @abstractmethod
39
    def attributes(self) -> Iterable:
40
        """The set of attributes is referenced through this property."""
41
        raise NotImplementedError
42
43
44
class DatapointsFactory:
45
    """Factory to construct Datapoints objects.
46
47
    A class that registers objects (constructors), which can be "called" to return (create) an
48
    object that implements the DatapointsInterface interface.
49
50
    Also, exposes the 'create' factory method that given runtime arguments,
51
    returns an object that implements the DatapointsInterface interface by
52
    delegating the creation process to one of the registered constructors.
53
    """
54
    constructors = {}
55
56
    @classmethod
57
    def register_constructor(cls, name: str):
58
        """Register, using a unique name, an object as a "runnable" constructor.
59
60
        A decorator method that should decorate a callable" The callable should
61
        return (create) an object that implements the DatapointsInterface
62
        interface.
63
64
        Args:
65
            name (str): the name under which to register the "constructor"
66
        """
67
        def wrapper(subclass):
68
            cls.constructors[name] = subclass
69
            return subclass
70
        return wrapper
71
72
    @classmethod
73
    def create(cls, name, *args, **kwargs) -> Iterable:
74
        """Create a Datapoints instance by using a registered "constructor".
75
76
        Args:
77
            name (str): the registered name of the "constructor" to use
78
79
        Raises:
80
            KeyError: happens if the input name is not found in the registry
81
            DatapointsCreationError: in case the object instantiation operation fails
82
83
        Returns:
84
            Iterable: instance implementing the DatapointsInterface
85
        """
86
        if name not in cls.constructors:
87
            # TODO change to KeyError, because it better indicates the cause of the error
88
            # In our case a string/key not found in the registry causes the error
89
            raise KeyError(
90
                f"Request Engine of type '{name}'; supported are [{', '.join(sorted(cls.constructors.keys()))}]")
91
        try:
92
            return cls.constructors[name](*args, **kwargs)
93
        except Exception as e:
94
            print('Original exception:', e)
95
            raise DatapointsCreationError(f"Exception {str(e)}. Datapoints creation failed for constructor {name}: {cls.constructors[name]}. Args: [{', '.join(f'{i}: {str(_)}' for i, _ in enumerate(args))}]\nKwargs: [{', '.join(f'{k}: {v}' for k, v in kwargs.items())}]")
96
97
98
class DatapointsCreationError(Exception): pass
99
100
101
102
@attr.s
103
@DatapointsFactory.register_constructor('structured-data')
104
class StructuredData(DatapointsInterface, StructuredDataInterface):
105
    """Structured data. There are specific attributes/variables per observation.
106
107
    Instances of this class represent collections of data (multiple data
108
    points aka observations). Each data point is expected to hold information
109
    about the specified attributes and that is why we are dealing with
110
    structured data/information in contrast to ie image data or sound data.
111
112
    Args:
113
        observations (object): a reference to the actual datapoints object
114
        attributes (object): a reference to the attributes object
115
    """
116
    _observations = attr.ib(init=True)
117
    _attributes = attr.ib(init=True, converter=lambda input_value: [x for x in input_value])
118
119
    # TODO remove property and "promote above attribute '_attributes' to 'attributes'
120
    @property
121
    def attributes(self):
122
        return self._attributes
123
124
    @property
125
    def observations(self):
126
        return self._observations
127
128
    @observations.setter
129
    def observations(self, observations):
130
        self._observations = observations
131
132
133
class AbstractTabularData(StructuredData, TabularDataInterface, ABC):
134
    """Tabular Data with known attributes of interest.
135
    
136
    Classes inhereting from this abstract class, gain both capabilities of structured data
137
    in terms of their attributes and capabilities of a data table in terms of column, rows, etc.
138
    """
139
    def __iter__(self):
140
        return self.iterrows()
141
142
143
@attr.s
144
@DatapointsFactory.register_constructor('tabular-data')
145
class TabularData(AbstractTabularData):
146
    """Table-like datapoints that are loaded in memory"""
147
148
    @property
149
    def columns(self) -> Iterable:
150
        pass
151
152
    @property
153
    def rows(self) -> Iterable:
154
        pass
155
156
    retriever = attr.ib(init=True)
157
    iterator = attr.ib(init=True)
158
    mutator = attr.ib(init=True)
159
160
    @property
161
    def attributes(self):
162
        return self.iterator.columnnames(self)
163
164
    def column(self, identifier):
165
        return self.retriever.column(identifier, self)
166
167
    def row(self, identifier):
168
        return self.retriever.row(identifier, self)
169
170
    def get_numerical_attributes(self):
171
        return self.retriever.get_numerical_attributes(self)
172
173
    def get_categorical_attributes(self):
174
        return iter(set(self.attributes) - set([_ for _ in self.retriever.get_numerical_attributes(self)]))
175
176
    @property
177
    def nb_columns(self):
178
        return self.retriever.nb_columns(self)
179
180
    @property
181
    def nb_rows(self):
182
        return self.retriever.nb_rows(self)
183
184
    def __len__(self):
185
        return self.retriever.nb_rows(self)
186
187
    def __iter__(self):
188
        return self.iterator.iterrows(self)
189
190
    def iterrows(self):
191
        return self.iterator.iterrows(self)
192
193
    def itercolumns(self):
194
        return self.iterator.itercolumns(self)
195