| Conditions | 4 |
| Total Lines | 72 |
| Code Lines | 44 |
| Lines | 0 |
| Ratio | 0 % |
| Changes | 0 | ||
Small methods make your code easier to understand, in particular if combined with a good name. Besides, if your method is small, finding a good name is usually much easier.
For example, if you find yourself adding comments to a method's body, this is usually a good sign to extract the commented part to a new method, and use the comment as a starting point when coming up with a good name for this new method.
Commonly applied refactorings include:
If many parameters/temporary variables are present:
| 1 | from glob import glob |
||
| 54 | @pytest.fixture |
||
| 55 | def test_dataset(somagic, read_observations, sample_collaped_json): |
||
| 56 | """Dataset ready to be fed into a training/inference algorithm; feature vectors have been computed.""" |
||
| 57 | read_observations(somagic, sample_collaped_json) |
||
| 58 | |||
| 59 | type_values = ['hybrid', 'indica', 'sativa'] |
||
| 60 | expected_feature_names = [f'type_{x}' for x in type_values] |
||
| 61 | from functools import reduce |
||
| 62 | UNIQUE_FLAVORS = reduce(lambda i, j: set(i).union(set(j)), |
||
| 63 | [_ for _ in somagic._data_manager.datapoints.observations['flavors'] if _ is not None]) |
||
| 64 | |||
| 65 | variables = type('Variables', (object,), { |
||
| 66 | 'type': type('Variable', (object,), { |
||
| 67 | 'type': 'nominal', |
||
| 68 | 'data_type': str, |
||
| 69 | '__str__': lambda self: 'type', |
||
| 70 | } |
||
| 71 | )(), |
||
| 72 | 'flavors': type('Variable', (object,), { |
||
| 73 | 'type': 'nominal', |
||
| 74 | 'data_type': list, |
||
| 75 | '__str__': lambda self: 'flavors', |
||
| 76 | } |
||
| 77 | )(), |
||
| 78 | }) |
||
| 79 | |||
| 80 | assert len(somagic._data_manager.datapoints) == 100 |
||
| 81 | assert all(x not in somagic._data_manager.datapoints.attributes for x in expected_feature_names) |
||
| 82 | |||
| 83 | cmd = somagic._data_manager.command.encode_command |
||
| 84 | cmd.args = [variables.type] |
||
| 85 | cmd.execute() |
||
| 86 | |||
| 87 | runtime_feature_names = list(somagic._data_manager.datapoints.attributes)[-len(expected_feature_names):] |
||
| 88 | assert runtime_feature_names == expected_feature_names |
||
| 89 | |||
| 90 | cmd = somagic._data_manager.command.replace_empty_command |
||
| 91 | cmd.args = [variables.flavors] |
||
| 92 | cmd.execute() |
||
| 93 | |||
| 94 | assert set([type(x) for x in somagic._data_manager.datapoints.observations['flavors']]) == {list} |
||
| 95 | |||
| 96 | nb_columns_before = len(somagic._data_manager.datapoints.observations.columns) |
||
| 97 | |||
| 98 | cmd = somagic._data_manager.command.encode_command |
||
| 99 | cmd.args = [variables.flavors] |
||
| 100 | cmd.execute() |
||
| 101 | |||
| 102 | assert nb_columns_before + len(UNIQUE_FLAVORS) == len(somagic._data_manager.datapoints.observations.columns) |
||
| 103 | |||
| 104 | cmd = somagic._data_manager.command.select_variables_command |
||
| 105 | # current limitations: |
||
| 106 | # 1. client code has to know the number of distict values for the nominal variable 'type' |
||
| 107 | # 2. client code has to provide the column names that will result after encoding the 'type' variable |
||
| 108 | cmd.args = [[ |
||
| 109 | # current limitations: |
||
| 110 | # 1. client code has to know the number of distict values for the nominal variable 'type' |
||
| 111 | # 2. client code has to provide the column names that will result after encoding the 'type' variable |
||
| 112 | {'variable': 'type', 'columns': runtime_feature_names}, |
||
| 113 | # current limitations: |
||
| 114 | # 1. client code has to know the number of distict values for the nominal variable 'flavors' |
||
| 115 | # 2. client code has to provide the column names that will result after encoding the 'flavors' variable |
||
| 116 | {'variable': 'flavors', 'columns': list(UNIQUE_FLAVORS)}]] |
||
| 117 | cmd.execute() |
||
| 118 | |||
| 119 | import numpy as np |
||
| 120 | setattr(somagic.dataset, 'feature_vectors', |
||
| 121 | np.array(somagic._data_manager.datapoints.observations[runtime_feature_names + ['flavors_' + x for x in UNIQUE_FLAVORS]])) |
||
| 122 | |||
| 123 | MAX_FLAVORS_PER_DAATPOINT = max( |
||
| 124 | [len(x) for x in [_ for _ in somagic._data_manager.datapoints.observations['flavors'] if type(_) is list]]) |
||
| 125 | return somagic.dataset, type_values, UNIQUE_FLAVORS, MAX_FLAVORS_PER_DAATPOINT, nb_columns_before |
||
| 126 | |||
| 179 |