| Conditions | 2 |
| Total Lines | 60 |
| Code Lines | 28 |
| Lines | 0 |
| Ratio | 0 % |
| Changes | 0 | ||
Small methods make your code easier to understand, in particular if combined with a good name. Besides, if your method is small, finding a good name is usually much easier.
For example, if you find yourself adding comments to a method's body, this is usually a good sign to extract the commented part to a new method, and use the comment as a starting point when coming up with a good name for this new method.
Commonly applied refactorings include:
If many parameters/temporary variables are present:
| 1 | from glob import glob |
||
| 54 | @pytest.fixture |
||
| 55 | def test_dataset(somagic, read_observations, sample_collaped_json): |
||
| 56 | """Dataset ready to be fed into a training/inference algorithm; feature vectors have been computed.""" |
||
| 57 | read_observations(somagic, sample_collaped_json) |
||
| 58 | |||
| 59 | type_values = ['hybrid', 'indica', 'sativa'] |
||
| 60 | ATTRS2 = [f'type_{x}' for x in type_values] |
||
| 61 | from functools import reduce |
||
| 62 | UNIQUE_FLAVORS = reduce(lambda i, j: set(i).union(set(j)), |
||
| 63 | [_ for _ in somagic._data_manager.datapoints.observations['flavors'] if _ is not None]) |
||
| 64 | |||
| 65 | # cmd = somagic._data_manager.command.encode_command |
||
| 66 | # cmd.args = [somagic._data_manager.datapoints, 'type'] |
||
| 67 | # cmd.execute() |
||
| 68 | # |
||
| 69 | # cmd = somagic._data_manager.command.replace_empty_command |
||
| 70 | # cmd.args = [somagic._data_manager.datapoints, 'flavors', []] |
||
| 71 | # cmd.execute() |
||
| 72 | # |
||
| 73 | # cmd = somagic._data_manager.command.encode_command |
||
| 74 | # cmd.args = [somagic._data_manager.datapoints, 'flavors'] |
||
| 75 | # cmd.execute() |
||
| 76 | |||
| 77 | |||
| 78 | cmd = somagic._data_manager.command.select_variables_command |
||
| 79 | # current limitations: |
||
| 80 | # 1. client code has to know the number of distict values for the nominal variable 'type' |
||
| 81 | # 2. client code has to provide the column names that will result after encoding the 'type' variable |
||
| 82 | cmd.args = [[ |
||
| 83 | # current limitations: |
||
| 84 | # 1. client code has to know the number of distict values for the nominal variable 'type' |
||
| 85 | # 2. client code has to provide the column names that will result after encoding the 'type' variable |
||
| 86 | {'variable': 'type', 'columns': ATTRS2}, |
||
| 87 | # current limitations: |
||
| 88 | # 1. client code has to know the number of distict values for the nominal variable 'flavors' |
||
| 89 | # 2. client code has to provide the column names that will result after encoding the 'flavors' variable |
||
| 90 | {'variable': 'flavors', 'columns': list(UNIQUE_FLAVORS)}]] |
||
| 91 | cmd.execute() |
||
| 92 | |||
| 93 | cmd = somagic._data_manager.command.one_hot_encoding_command |
||
| 94 | cmd.args = [somagic._data_manager.datapoints, 'type'] |
||
| 95 | cmd.execute() |
||
| 96 | |||
| 97 | assert set([type(x) for x in somagic._data_manager.datapoints.observations['flavors']]) == {list, type(None)} |
||
| 98 | |||
| 99 | nb_columns_before = len(somagic._data_manager.datapoints.observations.columns) |
||
| 100 | |||
| 101 | cmd = somagic._data_manager.command.one_hot_encoding_list_command |
||
| 102 | cmd.args = [somagic._data_manager.datapoints, 'flavors'] |
||
| 103 | cmd.execute() |
||
| 104 | |||
| 105 | assert nb_columns_before + len(UNIQUE_FLAVORS) == len(somagic._data_manager.datapoints.observations.columns) |
||
| 106 | |||
| 107 | import numpy as np |
||
| 108 | setattr(somagic.dataset, 'feature_vectors', |
||
| 109 | np.array(somagic._data_manager.datapoints.observations[ATTRS2 + list(UNIQUE_FLAVORS)])) |
||
| 110 | |||
| 111 | MAX_FLAVORS_PER_DAATPOINT = max( |
||
| 112 | [len(x) for x in [_ for _ in somagic._data_manager.datapoints.observations['flavors'] if type(_) is list]]) |
||
| 113 | return somagic.dataset, type_values, UNIQUE_FLAVORS, MAX_FLAVORS_PER_DAATPOINT, nb_columns_before |
||
| 114 | |||
| 166 |