| Conditions | 2 |
| Total Lines | 91 |
| Code Lines | 67 |
| Lines | 0 |
| Ratio | 0 % |
| Changes | 0 | ||
Small methods make your code easier to understand, in particular if combined with a good name. Besides, if your method is small, finding a good name is usually much easier.
For example, if you find yourself adding comments to a method's body, this is usually a good sign to extract the commented part to a new method, and use the comment as a starting point when coming up with a good name for this new method.
Commonly applied refactorings include:
If many parameters/temporary variables are present:
| 1 | from .data_manager import DataManager |
||
| 7 | def init_data_manager(a_backend): |
||
| 8 | data_manager = DataManager(a_backend, type('PhiFunction', (PhiFunctionRegistrator,), {}), FeatureManager([])) |
||
| 9 | mega_cmd_factory = MegaCommandFactory(data_manager) |
||
| 10 | mega_cmd_factory.attach(data_manager.commands_manager.command.accumulator) |
||
| 11 | |||
| 12 | mega_cmd_factory('select_variables') |
||
| 13 | |||
| 14 | @data_manager.backend.engine.dec() |
||
| 15 | def encode_nominal_subsets(datapoints, attribute, new_attribute): |
||
| 16 | from so_magic.data.features.phis import ListOfCategoricalPhi, DatapointsAttributePhi |
||
| 17 | phi = ListOfCategoricalPhi(DatapointsAttributePhi(datapoints)) |
||
| 18 | new_values = phi(attribute) |
||
| 19 | datapoints.mutator.add_column(datapoints, new_values, new_attribute) |
||
| 20 | |||
| 21 | import pandas as pd |
||
| 22 | |||
| 23 | @data_manager.backend.engine.dec() |
||
| 24 | def observations(file_path): |
||
| 25 | return pd.read_json(file_path, lines=True) |
||
| 26 | |||
| 27 | from so_magic.data.encoding import NominalAttributeEncoder |
||
| 28 | |||
| 29 | @NominalAttributeEncoder.register_as_subclass('one_hot') |
||
| 30 | class OneHotEncoder(NominalAttributeEncoder): |
||
| 31 | |||
| 32 | def encode(self, *args, **kwargs): |
||
| 33 | datapoints = args[0] |
||
| 34 | attribute = args[1] |
||
| 35 | prefix_separator = '_' |
||
| 36 | dataframe = pd.get_dummies(datapoints.observations[attribute], prefix=attribute, prefix_sep='_', drop_first=False) |
||
| 37 | self.values_set = [x.replace(f'{attribute}{prefix_separator}', '') for x in dataframe.columns] |
||
| 38 | self.columns = [x for x in dataframe.columns] |
||
| 39 | return dataframe |
||
| 40 | |||
| 41 | from so_magic.data.command_factories import DataManagerCommandFactory |
||
| 42 | from so_magic.utils import Command |
||
| 43 | |||
| 44 | @DataManagerCommandFactory.register_as_subclass('one_hot_encoding') |
||
| 45 | class EncodeNominalCommandFactory(DataManagerCommandFactory): |
||
| 46 | |||
| 47 | def construct(self, *args, **kwargs) -> Command: |
||
| 48 | _data_manager= args[0] |
||
| 49 | def one_hot_encoding(_datapoints, _attribute): |
||
| 50 | dataframe = OneHotEncoder().encode(_datapoints, _attribute) |
||
| 51 | _data_manager.datapoints.observations = pd.concat([_data_manager.datapoints.observations, dataframe], axis=1) |
||
| 52 | return Command(one_hot_encoding, '__call__', *args[1:]) |
||
| 53 | |||
| 54 | mega_cmd_factory('one_hot_encoding') |
||
| 55 | |||
| 56 | import numpy as np |
||
| 57 | from functools import reduce |
||
| 58 | |||
| 59 | @NominalAttributeEncoder.register_as_subclass('one_hot_list') |
||
| 60 | class OneHotListEncoder(NominalAttributeEncoder): |
||
| 61 | binary_transformer = {True: 1.0, False: 0.0} |
||
| 62 | |||
| 63 | def encode(self, *args, **kwargs): |
||
| 64 | datapoints = args[0] |
||
| 65 | attribute = args[1] |
||
| 66 | self.values_set = reduce(lambda i, j: set(i).union(set(j)), [_ for _ in datapoints.observations[attribute] if type(_) == list]) |
||
| 67 | self.columns = [_ for _ in self.values_set] |
||
| 68 | return pd.DataFrame([self._yield_vector(datarow, attribute) for index, datarow in datapoints.iterrows()], columns=self.columns) |
||
| 69 | |||
| 70 | def _yield_vector(self, datarow, attribute): |
||
| 71 | decision = {True: self._encode, False: self._encode_none} |
||
| 72 | return decision[type(datarow[attribute]) == list](datarow, attribute) |
||
| 73 | |||
| 74 | def _encode(self, datarow, attribute): |
||
| 75 | return [OneHotListEncoder.binary_transformer[column in datarow[attribute]] for column in self.columns] |
||
| 76 | |||
| 77 | def _encode_none(self, datarow, attribute): |
||
| 78 | return [0.0] * len(self.values_set) |
||
| 79 | |||
| 80 | |||
| 81 | @DataManagerCommandFactory.register_as_subclass('one_hot_encoding_list') |
||
| 82 | class EncodeNominalListCommandFactory(DataManagerCommandFactory): |
||
| 83 | |||
| 84 | def construct(self, *args, **kwargs) -> Command: |
||
| 85 | _data_manager = args[0] |
||
| 86 | |||
| 87 | def one_hot_encoding_list(_datapoints, _attribute): |
||
| 88 | _data_manager.datapoints.observations[_attribute].fillna(value=np.nan, inplace=True) |
||
| 89 | dataframe = OneHotListEncoder().encode(_datapoints, _attribute) |
||
| 90 | _data_manager.datapoints.observations = pd.concat([_data_manager.datapoints.observations, dataframe], |
||
| 91 | axis=1) |
||
| 92 | |||
| 93 | return Command(one_hot_encoding_list, '__call__', *args[1:]) |
||
| 94 | |||
| 95 | mega_cmd_factory('one_hot_encoding_list') |
||
| 96 | |||
| 97 | return data_manager |
||
| 98 |