Passed
Push — mpeta ( ff9ea9...522f25 )
by Konstantinos
01:42
created

so_magic.data.init_data_manager()   B

Complexity

Conditions 2

Size

Total Lines 75
Code Lines 55

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
cc 2
eloc 55
nop 1
dl 0
loc 75
rs 8.4727
c 0
b 0
f 0

How to fix   Long Method   

Long Method

Small methods make your code easier to understand, in particular if combined with a good name. Besides, if your method is small, finding a good name is usually much easier.

For example, if you find yourself adding comments to a method's body, this is usually a good sign to extract the commented part to a new method, and use the comment as a starting point when coming up with a good name for this new method.

Commonly applied refactorings include:

1
from .data_manager import DataManager
2
from .features.phi import PhiFunctionRegistrator
3
from .features import FeatureManager
4
from .command_factories import DataManagerCommandFactory
5
6
7
def init_data_manager(a_backend):
8
    data_manager = DataManager(a_backend, type('PhiFunction', (PhiFunctionRegistrator,), {}), FeatureManager([]))
9
    mega_cmd_factory = DataManagerCommandFactory(data_manager)
10
    mega_cmd_factory.attach(data_manager.commands_manager.command.accumulator)
11
12
    @data_manager.backend.engine.dec()
13
    def encode_nominal_subsets(datapoints, attribute, new_attribute):
14
        from so_magic.data.features.phis import ListOfCategoricalPhi, DatapointsAttributePhi
15
        phi = ListOfCategoricalPhi(DatapointsAttributePhi(datapoints))
16
        new_values = phi(attribute)
17
        datapoints.mutator.add_column(datapoints, new_values, new_attribute)
18
19
    import pandas as pd
20
21
    @data_manager.backend.engine.dec()
22
    def observations(file_path):
23
        return pd.read_json(file_path, lines=True)
24
25
    from so_magic.data.encoding import NominalAttributeEncoder
26
27
28
    class OneHotEncoder(NominalAttributeEncoder):
29
30
        def encode(self, *args, **kwargs):
31
            datapoints = args[0]
32
            attribute = args[1]
33
            prefix_separator = '_'
34
            dataframe = pd.get_dummies(datapoints.observations[attribute], prefix=attribute, prefix_sep='_', drop_first=False)
35
            self.values_set = [x.replace(f'{attribute}{prefix_separator}', '') for x in dataframe.columns]
36
            self.columns = [x for x in dataframe.columns]
37
            return dataframe
38
39
40
    @mega_cmd_factory.build_command_prototype()
41
    def one_hot_encoding(_data_manager, _datapoints, _attribute):
42
        dataframe = OneHotEncoder().encode(_datapoints, _attribute)
43
        _data_manager.datapoints.observations = pd.concat([_data_manager.datapoints.observations, dataframe], axis=1)
44
    
45
    
46
    @mega_cmd_factory.build_command_prototype()
47
    def select_variables(_data_manager, variables):
48
        _data_manager.feature_manager.feature_configuration = variables
49
50
51
    import numpy as np
52
    from functools import reduce
53
54
    class OneHotListEncoder(NominalAttributeEncoder):
55
        binary_transformer = {True: 1.0, False: 0.0}
56
57
        def encode(self, *args, **kwargs):
58
            datapoints = args[0]
59
            attribute = args[1]
60
            self.values_set = reduce(lambda i, j: set(i).union(set(j)), [_ for _ in datapoints.observations[attribute] if type(_) == list])
61
            self.columns = [_ for _ in self.values_set]
62
            return pd.DataFrame([self._yield_vector(datarow, attribute) for index, datarow in datapoints.iterrows()], columns=self.columns)
63
64
        def _yield_vector(self, datarow, attribute):
65
            decision = {True: self._encode, False: self._encode_none}
66
            return decision[type(datarow[attribute]) == list](datarow, attribute)
67
68
        def _encode(self, datarow, attribute):
69
            return [OneHotListEncoder.binary_transformer[column in datarow[attribute]] for column in self.columns]
70
71
        def _encode_none(self, datarow, attribute):
72
            return [0.0] * len(self.values_set)
73
74
    @mega_cmd_factory.build_command_prototype()
75
    def one_hot_encoding_list(_data_manager, _datapoints, _attribute):
76
        _data_manager.datapoints.observations[_attribute].fillna(value=np.nan, inplace=True)
77
        dataframe = OneHotListEncoder().encode(_datapoints, _attribute)
78
        _data_manager.datapoints.observations = pd.concat([_data_manager.datapoints.observations, dataframe],
79
                                                            axis=1)
80
81
    return data_manager
82