| Conditions | 2 |
| Total Lines | 87 |
| Code Lines | 65 |
| Lines | 0 |
| Ratio | 0 % |
| Changes | 0 | ||
Small methods make your code easier to understand, in particular if combined with a good name. Besides, if your method is small, finding a good name is usually much easier.
For example, if you find yourself adding comments to a method's body, this is usually a good sign to extract the commented part to a new method, and use the comment as a starting point when coming up with a good name for this new method.
Commonly applied refactorings include:
If many parameters/temporary variables are present:
| 1 | from .data_manager import DataManager |
||
| 13 | def init_data_manager(a_backend): |
||
| 14 | data_manager = DataManager(a_backend, type('PhiFunction', (PhiFunctionRegistrator,), {}), FeatureManager([])) |
||
| 15 | mega_cmd_factory = MegaCommandFactory(data_manager) |
||
| 16 | mega_cmd_factory.attach(data_manager.commands_manager.command.accumulator) |
||
| 17 | |||
| 18 | @data_manager.backend.engine.dec() |
||
| 19 | def encode_nominal_subsets(datapoints, attribute, new_attribute): |
||
| 20 | from so_magic.data.features.phis import ListOfCategoricalPhi, DatapointsAttributePhi |
||
| 21 | phi = ListOfCategoricalPhi(DatapointsAttributePhi(datapoints)) |
||
| 22 | new_values = phi(attribute) |
||
| 23 | datapoints.mutator.add_column(datapoints, new_values, new_attribute) |
||
| 24 | |||
| 25 | import pandas as pd |
||
| 26 | |||
| 27 | @data_manager.backend.engine.dec() |
||
| 28 | def observations(file_path): |
||
| 29 | return pd.read_json(file_path, lines=True) |
||
| 30 | |||
| 31 | from so_magic.data.encoding import NominalAttributeEncoder |
||
| 32 | |||
| 33 | @NominalAttributeEncoder.register_as_subclass('one_hot') |
||
| 34 | class OneHotEncoder(NominalAttributeEncoder): |
||
| 35 | |||
| 36 | def encode(self, *args, **kwargs): |
||
| 37 | datapoints = args[0] |
||
| 38 | attribute = args[1] |
||
| 39 | prefix_separator = '_' |
||
| 40 | dataframe = pd.get_dummies(datapoints.observations[attribute], prefix=attribute, prefix_sep='_', drop_first=False) |
||
| 41 | self.values_set = [x.replace(f'{attribute}{prefix_separator}', '') for x in dataframe.columns] |
||
| 42 | self.columns = [x for x in dataframe.columns] |
||
| 43 | return dataframe |
||
| 44 | |||
| 45 | from so_magic.data.command_factories import DataManagerCommandFactory |
||
| 46 | from so_magic.utils import Command |
||
| 47 | |||
| 48 | @DataManagerCommandFactory.register_as_subclass('one_hot_encoding') |
||
| 49 | class EncodeNominalCommandFactory(DataManagerCommandFactory): |
||
| 50 | |||
| 51 | def construct(self, *args, **kwargs) -> Command: |
||
| 52 | _data_manager= args[0] |
||
| 53 | def one_hot_encoding(_datapoints, _attribute): |
||
| 54 | dataframe = OneHotEncoder().encode(_datapoints, _attribute) |
||
| 55 | _data_manager.datapoints.observations = pd.concat([_data_manager.datapoints.observations, dataframe], axis=1) |
||
| 56 | return Command(one_hot_encoding, '__call__', *args[1:]) |
||
| 57 | |||
| 58 | import numpy as np |
||
| 59 | from functools import reduce |
||
| 60 | |||
| 61 | @NominalAttributeEncoder.register_as_subclass('one_hot_list') |
||
| 62 | class OneHotListEncoder(NominalAttributeEncoder): |
||
| 63 | binary_transformer = {True: 1.0, False: 0.0} |
||
| 64 | |||
| 65 | def encode(self, *args, **kwargs): |
||
| 66 | datapoints = args[0] |
||
| 67 | attribute = args[1] |
||
| 68 | self.values_set = reduce(lambda i, j: set(i).union(set(j)), [_ for _ in datapoints.observations[attribute] if type(_) == list]) |
||
| 69 | self.columns = [_ for _ in self.values_set] |
||
| 70 | return pd.DataFrame([self._yield_vector(datarow, attribute) for index, datarow in datapoints.iterrows()], columns=self.columns) |
||
| 71 | |||
| 72 | def _yield_vector(self, datarow, attribute): |
||
| 73 | decision = {True: self._encode, False: self._encode_none} |
||
| 74 | return decision[type(datarow[attribute]) == list](datarow, attribute) |
||
| 75 | |||
| 76 | def _encode(self, datarow, attribute): |
||
| 77 | return [OneHotListEncoder.binary_transformer[column in datarow[attribute]] for column in self.columns] |
||
| 78 | |||
| 79 | def _encode_none(self, datarow, attribute): |
||
| 80 | return [0.0] * len(self.values_set) |
||
| 81 | |||
| 82 | |||
| 83 | @DataManagerCommandFactory.register_as_subclass('one_hot_encoding_list') |
||
| 84 | class EncodeNominalListCommandFactory(DataManagerCommandFactory): |
||
| 85 | |||
| 86 | def construct(self, *args, **kwargs) -> Command: |
||
| 87 | _data_manager = args[0] |
||
| 88 | |||
| 89 | def one_hot_encoding_list(_datapoints, _attribute): |
||
| 90 | _data_manager.datapoints.observations[_attribute].fillna(value=np.nan, inplace=True) |
||
| 91 | dataframe = OneHotListEncoder().encode(_datapoints, _attribute) |
||
| 92 | _data_manager.datapoints.observations = pd.concat([_data_manager.datapoints.observations, dataframe], |
||
| 93 | axis=1) |
||
| 94 | |||
| 95 | return Command(one_hot_encoding_list, '__call__', *args[1:]) |
||
| 96 | |||
| 97 | build_commands(mega_cmd_factory) |
||
| 98 | |||
| 99 | return data_manager |
||
| 100 |