| Conditions | 2 |
| Total Lines | 75 |
| Code Lines | 55 |
| Lines | 0 |
| Ratio | 0 % |
| Changes | 0 | ||
Small methods make your code easier to understand, in particular if combined with a good name. Besides, if your method is small, finding a good name is usually much easier.
For example, if you find yourself adding comments to a method's body, this is usually a good sign to extract the commented part to a new method, and use the comment as a starting point when coming up with a good name for this new method.
Commonly applied refactorings include:
If many parameters/temporary variables are present:
| 1 | from .data_manager import DataManager |
||
| 7 | def init_data_manager(a_backend): |
||
| 8 | data_manager = DataManager(a_backend, type('PhiFunction', (PhiFunctionRegistrator,), {}), FeatureManager([])) |
||
| 9 | mega_cmd_factory = DataManagerCommandFactory(data_manager) |
||
| 10 | mega_cmd_factory.attach(data_manager.commands_manager.command.accumulator) |
||
| 11 | |||
| 12 | @data_manager.backend.engine.dec() |
||
| 13 | def encode_nominal_subsets(datapoints, attribute, new_attribute): |
||
| 14 | from so_magic.data.features.phis import ListOfCategoricalPhi, DatapointsAttributePhi |
||
| 15 | phi = ListOfCategoricalPhi(DatapointsAttributePhi(datapoints)) |
||
| 16 | new_values = phi(attribute) |
||
| 17 | datapoints.mutator.add_column(datapoints, new_values, new_attribute) |
||
| 18 | |||
| 19 | import pandas as pd |
||
| 20 | |||
| 21 | @data_manager.backend.engine.dec() |
||
| 22 | def observations(file_path): |
||
| 23 | return pd.read_json(file_path, lines=True) |
||
| 24 | |||
| 25 | from so_magic.data.encoding import NominalAttributeEncoder |
||
| 26 | |||
| 27 | |||
| 28 | class OneHotEncoder(NominalAttributeEncoder): |
||
| 29 | |||
| 30 | def encode(self, *args, **kwargs): |
||
| 31 | datapoints = args[0] |
||
| 32 | attribute = args[1] |
||
| 33 | prefix_separator = '_' |
||
| 34 | dataframe = pd.get_dummies(datapoints.observations[attribute], prefix=attribute, prefix_sep='_', drop_first=False) |
||
| 35 | self.values_set = [x.replace(f'{attribute}{prefix_separator}', '') for x in dataframe.columns] |
||
| 36 | self.columns = [x for x in dataframe.columns] |
||
| 37 | return dataframe |
||
| 38 | |||
| 39 | |||
| 40 | @mega_cmd_factory.build_command_prototype() |
||
| 41 | def one_hot_encoding(_data_manager, _datapoints, _attribute): |
||
| 42 | dataframe = OneHotEncoder().encode(_datapoints, _attribute) |
||
| 43 | _data_manager.datapoints.observations = pd.concat([_data_manager.datapoints.observations, dataframe], axis=1) |
||
| 44 | |||
| 45 | |||
| 46 | @mega_cmd_factory.build_command_prototype() |
||
| 47 | def select_variables(_data_manager, variables): |
||
| 48 | _data_manager.feature_manager.feature_configuration = variables |
||
| 49 | |||
| 50 | |||
| 51 | import numpy as np |
||
| 52 | from functools import reduce |
||
| 53 | |||
| 54 | class OneHotListEncoder(NominalAttributeEncoder): |
||
| 55 | binary_transformer = {True: 1.0, False: 0.0} |
||
| 56 | |||
| 57 | def encode(self, *args, **kwargs): |
||
| 58 | datapoints = args[0] |
||
| 59 | attribute = args[1] |
||
| 60 | self.values_set = reduce(lambda i, j: set(i).union(set(j)), [_ for _ in datapoints.observations[attribute] if type(_) == list]) |
||
| 61 | self.columns = [_ for _ in self.values_set] |
||
| 62 | return pd.DataFrame([self._yield_vector(datarow, attribute) for index, datarow in datapoints.iterrows()], columns=self.columns) |
||
| 63 | |||
| 64 | def _yield_vector(self, datarow, attribute): |
||
| 65 | decision = {True: self._encode, False: self._encode_none} |
||
| 66 | return decision[type(datarow[attribute]) == list](datarow, attribute) |
||
| 67 | |||
| 68 | def _encode(self, datarow, attribute): |
||
| 69 | return [OneHotListEncoder.binary_transformer[column in datarow[attribute]] for column in self.columns] |
||
| 70 | |||
| 71 | def _encode_none(self, datarow, attribute): |
||
| 72 | return [0.0] * len(self.values_set) |
||
| 73 | |||
| 74 | @mega_cmd_factory.build_command_prototype() |
||
| 75 | def one_hot_encoding_list(_data_manager, _datapoints, _attribute): |
||
| 76 | _data_manager.datapoints.observations[_attribute].fillna(value=np.nan, inplace=True) |
||
| 77 | dataframe = OneHotListEncoder().encode(_datapoints, _attribute) |
||
| 78 | _data_manager.datapoints.observations = pd.concat([_data_manager.datapoints.observations, dataframe], |
||
| 79 | axis=1) |
||
| 80 | |||
| 81 | return data_manager |
||
| 82 |