Passed
Push — dev ( 1b3874...6a1e3c )
by Konstantinos
03:33
created

FeatureDiscretizerFactory.numerical()   A

Complexity

Conditions 2

Size

Total Lines 5
Code Lines 5

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
eloc 5
dl 0
loc 5
rs 10
c 0
b 0
f 0
cc 2
nop 3
1
from abc import ABC
2
import attr
3
import copy
4
5
6
class DiscretizerInterface(ABC):
7
    def discretize(self, *args, **kwargs):
8
        raise NotImplementedError
9
10
11
class AbstractDiscretizer(DiscretizerInterface):
12
    def discretize(self, *args, **kwargs):
13
        raise NotImplementedError
14
15
16
@attr.s
17
class BaseDiscretizer(AbstractDiscretizer):
18
    binner = attr.ib(init=True)
19
    @bin.validator
20
    def validate_bin_function(self, attribute, value):
21
        if not callable(value):
22
            raise ValueError(f'Expected a callable object, instead a {type(value).__name__} was given.')
23
24
    def discretize(self, *args, **kwargs):
25
        """Expects args: dataset, feature and kwargs; 'nb_bins'."""
26
        dataset, feature, nb_bins = args[0], args[1], args[2]
27
        return self.binner(feature.values(dataset), nb_bins)
28
29
30
@attr.s
31
class FeatureDiscretizer(BaseDiscretizer):
32
    feature = attr.ib(init=True)
33
34
    def discretize(self, *args, **kwargs):
35
        """Expects args: dataset, nb_bins."""
36
        return super().discretize(args[0], self.feature, args[1])
37
38
@attr.s
39
class FeatureDiscretizerFactory:
40
    binner_factory = attr.ib(init=True)
41
42
    def categorical(self, feature, **kwargs) -> FeatureDiscretizer:
43
        binner_type = 'same-length'
44
        if kwargs.get('quantisized', False):
45
            binner_type = 'quantisized'
46
        return FeatureDiscretizer(self.binner_factory.create_binner(binner_type), feature)
47
48
    def numerical(self, feature, **kwargs) -> FeatureDiscretizer:
49
        binner_type = 'same-length'
50
        if kwargs.get('quantisized', False):
51
            binner_type = 'quantisized'
52
        return FeatureDiscretizer(self.binner_factory.create_binner(binner_type), feature)
53
54
55
#########################################
56
57
class BinnerInterface(ABC):
58
    def bin(self, values, nb_bins):
59
        raise NotImplementedError
60
61
62
class BaseBinner(BinnerInterface):
63
64
    def bin(self, values, nb_bins):
65
        """It is assumed numerical (ratio or interval) variable or ordinal (not nominal) categorical variable."""
66
        raise NotImplementedError
67
68
69
class BinnerFactory:
70
    @classmethod
71
    def register_as_subclass(cls, backend_type):
72
        def wrapper(subclass):
73
            cls.subclasses[backend_type] = subclass
74
            return subclass
75
        return wrapper
76
77
    @classmethod
78
    def create(cls, backend_type, *args, **kwargs):
79
        if backend_type not in cls.subclasses:
80
            raise ValueError('Bad "BinnerFactory Backend type" type \'{}\''.format(backend_type))
81
        return cls.subclasses[backend_type](*args, **kwargs)
82
83
    def equal_length_binner(self, *args, **kwargs) -> BaseBinner:
84
        raise NotImplementedError
85
    def quantisized_binner(self, *args, **kwargs) -> BaseBinner:
86
        raise NotImplementedError
87
88
    def create_binner(self, *args, **kwargs) -> BaseBinner:
89
        raise NotImplementedError
90
91