Passed
Push — mpeta ( b8b967...a54cf8 )
by Konstantinos
01:35
created

so_magic.data.build_commands()   A

Complexity

Conditions 2

Size

Total Lines 4
Code Lines 3

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
cc 2
eloc 3
nop 1
dl 0
loc 4
rs 10
c 0
b 0
f 0
1
from .data_manager import DataManager
2
from .features.phi import PhiFunctionRegistrator
3
from .features import FeatureManager
4
from .command_factories import MegaCommandFactory
5
6
7
def build_commands(mega_cmd_factory):
8
    """Find all sub-factories and build command prototypes."""
9
    for command_factory_key in mega_cmd_factory.command_factory.subclasses:
10
        mega_cmd_factory(command_factory_key)
11
12
13
def init_data_manager(a_backend):
14
    data_manager = DataManager(a_backend, type('PhiFunction', (PhiFunctionRegistrator,), {}), FeatureManager([]))
15
    mega_cmd_factory = MegaCommandFactory(data_manager)
16
    mega_cmd_factory.attach(data_manager.commands_manager.command.accumulator)
17
18
    @data_manager.backend.engine.dec()
19
    def encode_nominal_subsets(datapoints, attribute, new_attribute):
20
        from so_magic.data.features.phis import ListOfCategoricalPhi, DatapointsAttributePhi
21
        phi = ListOfCategoricalPhi(DatapointsAttributePhi(datapoints))
22
        new_values = phi(attribute)
23
        datapoints.mutator.add_column(datapoints, new_values, new_attribute)
24
25
    import pandas as pd
26
27
    @data_manager.backend.engine.dec()
28
    def observations(file_path):
29
        return pd.read_json(file_path, lines=True)
30
31
    from so_magic.data.encoding import NominalAttributeEncoder
32
33
    @NominalAttributeEncoder.register_as_subclass('one_hot')
34
    class OneHotEncoder(NominalAttributeEncoder):
35
36
        def encode(self, *args, **kwargs):
37
            datapoints = args[0]
38
            attribute = args[1]
39
            prefix_separator = '_'
40
            dataframe = pd.get_dummies(datapoints.observations[attribute], prefix=attribute, prefix_sep='_', drop_first=False)
41
            self.values_set = [x.replace(f'{attribute}{prefix_separator}', '') for x in dataframe.columns]
42
            self.columns = [x for x in dataframe.columns]
43
            return dataframe
44
45
    from so_magic.data.command_factories import DataManagerCommandFactory
46
    from so_magic.utils import Command
47
48
    @DataManagerCommandFactory.register_as_subclass('one_hot_encoding')
49
    class EncodeNominalCommandFactory(DataManagerCommandFactory):
50
51
        def construct(self, *args, **kwargs) -> Command:
52
            _data_manager= args[0]
53
            def one_hot_encoding(_datapoints, _attribute):
54
                dataframe = OneHotEncoder().encode(_datapoints, _attribute)
55
                _data_manager.datapoints.observations = pd.concat([_data_manager.datapoints.observations, dataframe], axis=1)
56
            return Command(one_hot_encoding, '__call__', *args[1:])
57
58
    import numpy as np
59
    from functools import reduce
60
61
    @NominalAttributeEncoder.register_as_subclass('one_hot_list')
62
    class OneHotListEncoder(NominalAttributeEncoder):
63
        binary_transformer = {True: 1.0, False: 0.0}
64
65
        def encode(self, *args, **kwargs):
66
            datapoints = args[0]
67
            attribute = args[1]
68
            self.values_set = reduce(lambda i, j: set(i).union(set(j)), [_ for _ in datapoints.observations[attribute] if type(_) == list])
69
            self.columns = [_ for _ in self.values_set]
70
            return pd.DataFrame([self._yield_vector(datarow, attribute) for index, datarow in datapoints.iterrows()], columns=self.columns)
71
72
        def _yield_vector(self, datarow, attribute):
73
            decision = {True: self._encode, False: self._encode_none}
74
            return decision[type(datarow[attribute]) == list](datarow, attribute)
75
76
        def _encode(self, datarow, attribute):
77
            return [OneHotListEncoder.binary_transformer[column in datarow[attribute]] for column in self.columns]
78
79
        def _encode_none(self, datarow, attribute):
80
            return [0.0] * len(self.values_set)
81
82
83
    @DataManagerCommandFactory.register_as_subclass('one_hot_encoding_list')
84
    class EncodeNominalListCommandFactory(DataManagerCommandFactory):
85
86
        def construct(self, *args, **kwargs) -> Command:
87
            _data_manager = args[0]
88
89
            def one_hot_encoding_list(_datapoints, _attribute):
90
                _data_manager.datapoints.observations[_attribute].fillna(value=np.nan, inplace=True)
91
                dataframe = OneHotListEncoder().encode(_datapoints, _attribute)
92
                _data_manager.datapoints.observations = pd.concat([_data_manager.datapoints.observations, dataframe],
93
                                                                  axis=1)
94
95
            return Command(one_hot_encoding_list, '__call__', *args[1:])
96
97
    build_commands(mega_cmd_factory)
98
99
    return data_manager
100