Completed
Push — appveyor ( 280314...2c0e2c )
by Konstantinos
02:09
created

so_magic.data.init_data_manager()   B

Complexity

Conditions 2

Size

Total Lines 91
Code Lines 67

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
cc 2
eloc 67
nop 1
dl 0
loc 91
rs 8.08
c 0
b 0
f 0

How to fix   Long Method   

Long Method

Small methods make your code easier to understand, in particular if combined with a good name. Besides, if your method is small, finding a good name is usually much easier.

For example, if you find yourself adding comments to a method's body, this is usually a good sign to extract the commented part to a new method, and use the comment as a starting point when coming up with a good name for this new method.

Commonly applied refactorings include:

1
from .data_manager import DataManager
2
from .features.phi import PhiFunctionRegistrator
3
from .features import FeatureManager
4
from .command_factories import MegaCommandFactory
5
6
7
def init_data_manager(a_backend):
8
    data_manager = DataManager(a_backend, type('PhiFunction', (PhiFunctionRegistrator,), {}), FeatureManager([]))
9
    mega_cmd_factory = MegaCommandFactory(data_manager)
10
    mega_cmd_factory.attach(data_manager.commands_manager.command.accumulator)
11
12
    mega_cmd_factory('select_variables')
13
14
    @data_manager.backend.engine.dec()
15
    def encode_nominal_subsets(datapoints, attribute, new_attribute):
16
        from so_magic.data.features.phis import ListOfCategoricalPhi, DatapointsAttributePhi
17
        phi = ListOfCategoricalPhi(DatapointsAttributePhi(datapoints))
18
        new_values = phi(attribute)
19
        datapoints.mutator.add_column(datapoints, new_values, new_attribute)
20
21
    import pandas as pd
22
23
    @data_manager.backend.engine.dec()
24
    def observations(file_path):
25
        return pd.read_json(file_path, lines=True)
26
27
    from so_magic.data.encoding import NominalAttributeEncoder
28
29
    @NominalAttributeEncoder.register_as_subclass('one_hot')
30
    class OneHotEncoder(NominalAttributeEncoder):
31
32
        def encode(self, *args, **kwargs):
33
            datapoints = args[0]
34
            attribute = args[1]
35
            prefix_separator = '_'
36
            dataframe = pd.get_dummies(datapoints.observations[attribute], prefix=attribute, prefix_sep='_', drop_first=False)
37
            self.values_set = [x.replace(f'{attribute}{prefix_separator}', '') for x in dataframe.columns]
38
            self.columns = [x for x in dataframe.columns]
39
            return dataframe
40
41
    from so_magic.data.command_factories import DataManagerCommandFactory
42
    from so_magic.utils import Command
43
44
    @DataManagerCommandFactory.register_as_subclass('one_hot_encoding')
45
    class EncodeNominalCommandFactory(DataManagerCommandFactory):
46
47
        def construct(self, *args, **kwargs) -> Command:
48
            _data_manager= args[0]
49
            def one_hot_encoding(_datapoints, _attribute):
50
                dataframe = OneHotEncoder().encode(_datapoints, _attribute)
51
                _data_manager.datapoints.observations = pd.concat([_data_manager.datapoints.observations, dataframe], axis=1)
52
            return Command(one_hot_encoding, '__call__', *args[1:])
53
54
    mega_cmd_factory('one_hot_encoding')
55
56
    import numpy as np
57
    from functools import reduce
58
59
    @NominalAttributeEncoder.register_as_subclass('one_hot_list')
60
    class OneHotListEncoder(NominalAttributeEncoder):
61
        binary_transformer = {True: 1.0, False: 0.0}
62
63
        def encode(self, *args, **kwargs):
64
            datapoints = args[0]
65
            attribute = args[1]
66
            self.values_set = reduce(lambda i, j: set(i).union(set(j)), [_ for _ in datapoints.observations[attribute] if type(_) == list])
67
            self.columns = [_ for _ in self.values_set]
68
            return pd.DataFrame([self._yield_vector(datarow, attribute) for index, datarow in datapoints.iterrows()], columns=self.columns)
69
70
        def _yield_vector(self, datarow, attribute):
71
            decision = {True: self._encode, False: self._encode_none}
72
            return decision[type(datarow[attribute]) == list](datarow, attribute)
73
74
        def _encode(self, datarow, attribute):
75
            return [OneHotListEncoder.binary_transformer[column in datarow[attribute]] for column in self.columns]
76
77
        def _encode_none(self, datarow, attribute):
78
            return [0.0] * len(self.values_set)
79
80
81
    @DataManagerCommandFactory.register_as_subclass('one_hot_encoding_list')
82
    class EncodeNominalListCommandFactory(DataManagerCommandFactory):
83
84
        def construct(self, *args, **kwargs) -> Command:
85
            _data_manager = args[0]
86
87
            def one_hot_encoding_list(_datapoints, _attribute):
88
                _data_manager.datapoints.observations[_attribute].fillna(value=np.nan, inplace=True)
89
                dataframe = OneHotListEncoder().encode(_datapoints, _attribute)
90
                _data_manager.datapoints.observations = pd.concat([_data_manager.datapoints.observations, dataframe],
91
                                                                  axis=1)
92
93
            return Command(one_hot_encoding_list, '__call__', *args[1:])
94
95
    mega_cmd_factory('one_hot_encoding_list')
96
97
    return data_manager
98