Passed
Push — mpeta ( 4519ef...1841cb )
by Konstantinos
01:23
created

DatapointsCreationError.__init__()   A

Complexity

Conditions 1

Size

Total Lines 3
Code Lines 3

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
cc 1
eloc 3
nop 2
dl 0
loc 3
rs 10
c 0
b 0
f 0
1
from abc import ABC, abstractmethod
2
from typing import Iterable
3
import attr
4
from so_magic.utils import SubclassRegistry
5
from .tabular_data_interface import TabularDataInterface
6
7
8
class DatapointsInterface(ABC):
9
    """Represent multiple data points out of a collection of data.
10
11
    Classes implementing this interface, provide to their object instances (eg
12
    objects created using the classes constructor method) the 'observations'
13
    property.
14
15
    The 'observations' property should hold the information about the
16
    datapoints.
17
    """
18
19
    @property
20
    @abstractmethod
21
    def observations(self) -> Iterable:
22
        """The collection of datapoints is referenced through this property."""
23
        raise NotImplementedError
24
25
26
class StructuredDataInterface(ABC):
27
    """Data points that are expected to have a specific set of attributes.
28
29
    Classes implementing this interface, provide to their object instances (eg
30
    objects created using the classes constructor method) the 'attributes'
31
    property.
32
33
    The 'attributes' property should hold the information about the attributes,
34
    that each data point (observation) is expected to have.
35
    """
36
37
    @property
38
    @abstractmethod
39
    def attributes(self) -> Iterable:
40
        """The set of attributes is referenced through this property."""
41
        raise NotImplementedError
42
43
44
class DatapointsFactory(metaclass=SubclassRegistry):
45
    """Factory to construct Datapoints objects.
46
47
    A class that registers objects (constructors), which can be "called" to return (create) an
48
    object that implements the DatapointsInterface interface.
49
50
    Also, exposes the 'create' factory method that given runtime arguments,
51
    returns an object that implements the DatapointsInterface interface by
52
    delegating the creation process to one of the registered constructors.
53
    """
54
    @classmethod
55
    def create(cls, name, *args, **kwargs) -> Iterable:
56
        """Create a Datapoints instance by using a registered "constructor".
57
58
        Args:
59
            name (str): the registered name of the "constructor" to use
60
61
        Raises:
62
            KeyError: happens if the input name is not found in the registry
63
            DatapointsCreationError: in case the object instantiation operation fails
64
65
        Returns:
66
            Iterable: instance implementing the DatapointsInterface
67
        """
68
        try:
69
            return cls.subclasses[name](*args, **kwargs)
70
        except ValueError as value_error:
71
            raise Value_error
0 ignored issues
show
Comprehensibility Best Practice introduced by
The variable Value_error does not seem to be defined.
Loading history...
72
        except Exception as exception:
73
            raise DatapointsCreationError(f"Exception type {type(exception)}. Datapoints creation failed for constructor {name}: "
74
            f"{cls.subclasses.get(name)}. Args: [{', '.join(f'{i}: {str(_)}' for i, _ in enumerate(args))}]\nKwargs: "
75
            f"[{', '.join(f'{k}: {v}' for k, v in kwargs.items())}]") from exception
76
77
78
class DatapointsCreationError(Exception): pass
79
80
81
@attr.s
82
@DatapointsFactory.register_as_subclass('structured-data')
83
class StructuredData(DatapointsInterface, StructuredDataInterface):
84
    """Structured data. There are specific attributes/variables per observation.
85
86
    Instances of this class represent collections of data (multiple data
87
    points aka observations). Each data point is expected to hold information
88
    about the specified attributes and that is why we are dealing with
89
    structured data/information in contrast to ie image data or sound data.
90
91
    Args:
92
        observations (object): a reference to the actual datapoints object
93
        attributes (object): a reference to the attributes object
94
    """
95
    _observations = attr.ib(init=True)
96
    _attributes = attr.ib(init=True, converter=lambda input_value: list(input_value))
97
98
    # TODO remove property and "promote above attribute '_attributes' to 'attributes'
99
    @property
100
    def attributes(self):
101
        return self._attributes
102
103
    @property
104
    def observations(self):
105
        return self._observations
106
107
    @observations.setter
108
    def observations(self, observations):
109
        self._observations = observations
110
111
112
class AbstractTabularData(StructuredData, TabularDataInterface, ABC):
113
    """Tabular Data with known attributes of interest.
114
115
    Classes inheriting from this abstract class, gain both capabilities of structured data
116
    in terms of their attributes and capabilities of a data table in terms of column, rows, etc.
117
    """
118
    def __iter__(self):
119
        return self.iterrows()
120
121
122
@attr.s
123
@DatapointsFactory.register_as_subclass('tabular-data')
124
class TabularData(AbstractTabularData):
125
    """Table-like datapoints that are loaded in memory"""
126
    retriever = attr.ib(init=True)
127
    iterator = attr.ib(init=True)
128
    mutator = attr.ib(init=True)
129
130
    @property
131
    def columns(self) -> Iterable:
132
        pass
133
134
    @property
135
    def rows(self) -> Iterable:
136
        pass
137
138
    @property
139
    def attributes(self):
140
        return self.iterator.columnnames(self)
141
142
    def column(self, identifier):
143
        return self.retriever.column(identifier, self)
144
145
    def row(self, identifier):
146
        return self.retriever.row(identifier, self)
147
148
    def get_numerical_attributes(self):
149
        return self.retriever.get_numerical_attributes(self)
150
151
    def get_categorical_attributes(self):
152
        return iter(set(self.attributes) - set(self.retriever.get_numerical_attributes(self)))
153
154
    @property
155
    def nb_columns(self):
156
        return self.retriever.nb_columns(self)
157
158
    @property
159
    def nb_rows(self):
160
        return self.retriever.nb_rows(self)
161
162
    def __len__(self):
163
        return self.retriever.nb_rows(self)
164
165
    def __iter__(self):
166
        return self.iterator.iterrows(self)
167
168
    def iterrows(self):
169
        return self.iterator.iterrows(self)
170
171
    def itercolumns(self):
172
        return self.iterator.itercolumns(self)
173
174
    def add_column(self, values, column_name, **kwargs):
175
        self.mutator.add_column(self, values, column_name, **kwargs)
176