Passed
Push — mpeta ( 62640f...eed483 )
by Konstantinos
01:41
created

tests.conftest.test_dataset()   B

Complexity

Conditions 4

Size

Total Lines 72
Code Lines 44

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
cc 4
eloc 44
nop 3
dl 0
loc 72
rs 8.824
c 0
b 0
f 0

How to fix   Long Method   

Long Method

Small methods make your code easier to understand, in particular if combined with a good name. Besides, if your method is small, finding a good name is usually much easier.

For example, if you find yourself adding comments to a method's body, this is usually a good sign to extract the commented part to a new method, and use the comment as a starting point when coming up with a good name for this new method.

Commonly applied refactorings include:

1
from glob import glob
2
import pytest
3
4
5
def file_path_to_module_path(string: str) -> str:
6
    return string.replace("/", ".").replace("\\", ".").replace(".py", "")
7
8
9
pytest_plugins = [
10
    file_path_to_module_path(fixture) for fixture in glob("tests/fixtures/*.py") if "__" not in fixture
11
]
12
13
14
@pytest.fixture
15
def somagic():
16
    from so_magic import init_so_magic
17
    _ = init_so_magic()
18
    return _
19
20
21
@pytest.fixture
22
def data_manager():
23
    def getter():
24
        from so_magic.data import init_data_manager
25
        from so_magic.data.backend import init_engine
26
        data_manager = init_data_manager(init_engine(engine_type='pd'))
27
        return data_manager
28
    return getter
29
30
31
@pytest.fixture
32
def read_observations():
33
    """Read a json lines formatted file and create the observations object (see Datapoints class)."""
34
    def load_data(so_master, json_lines_formatted_file_path):
35
        """Create the observations object for a Datapoints instance, given a data file.
36
37
        Args:
38
            so_master (so_magic.so_master.SoMaster): an instance of SoMaster
39
            json_lines_formatted_file_path (str): path to a json lines formatted file with the observations data
40
        """
41
        cmd = so_master.command.observations_command
42
        cmd.args = [json_lines_formatted_file_path]
43
        cmd.execute()
44
    return load_data
45
46
47
@pytest.fixture
48
def test_datapoints(read_observations, sample_collaped_json, somagic):
49
    """Read the designated json lines 'test file' (which contains the 'test observations') as a Datapoints instance."""
50
    read_observations(somagic, sample_collaped_json)
51
    return somagic.datapoints
52
53
54
@pytest.fixture
55
def test_dataset(somagic, read_observations, sample_collaped_json):
56
    """Dataset ready to be fed into a training/inference algorithm; feature vectors have been computed."""
57
    read_observations(somagic, sample_collaped_json)
58
59
    type_values = ['hybrid', 'indica', 'sativa']
60
    expected_feature_names = [f'type_{x}' for x in type_values]
61
    from functools import reduce
62
    UNIQUE_FLAVORS = reduce(lambda i, j: set(i).union(set(j)),
63
                            [_ for _ in somagic._data_manager.datapoints.observations['flavors'] if _ is not None])
64
    
65
    variables = type('Variables', (object,), {
66
        'type': type('Variable', (object,), {
67
            'type': 'nominal',
68
            'data_type': str,
69
            '__str__': lambda self: 'type',
70
            }
71
        )(),
72
        'flavors': type('Variable', (object,), {
73
            'type': 'nominal',
74
            'data_type': list,
75
            '__str__': lambda self: 'flavors',
76
            }
77
        )(),
78
    })
79
80
    assert len(somagic._data_manager.datapoints) == 100
81
    assert all(x not in somagic._data_manager.datapoints.attributes for x in expected_feature_names)
82
83
    cmd = somagic._data_manager.command.encode_command
84
    cmd.args = [variables.type]
85
    cmd.execute()
86
87
    runtime_feature_names = list(somagic._data_manager.datapoints.attributes)[-len(expected_feature_names):]
88
    assert runtime_feature_names == expected_feature_names
89
90
    cmd = somagic._data_manager.command.replace_empty_command
91
    cmd.args = [variables.flavors]
92
    cmd.execute()
93
94
    assert set([type(x) for x in somagic._data_manager.datapoints.observations['flavors']]) == {list}
95
96
    nb_columns_before = len(somagic._data_manager.datapoints.observations.columns)
97
98
    cmd = somagic._data_manager.command.encode_command
99
    cmd.args = [variables.flavors]
100
    cmd.execute()
101
102
    assert nb_columns_before + len(UNIQUE_FLAVORS) == len(somagic._data_manager.datapoints.observations.columns)
103
104
    cmd = somagic._data_manager.command.select_variables_command
105
    # current limitations:
106
    # 1. client code has to know the number of distict values for the nominal variable 'type'
107
    # 2. client code has to provide the column names that will result after encoding the 'type' variable
108
    cmd.args = [[
109
        # current limitations:
110
        # 1. client code has to know the number of distict values for the nominal variable 'type'
111
        # 2. client code has to provide the column names that will result after encoding the 'type' variable
112
        {'variable': 'type', 'columns': runtime_feature_names},
113
        # current limitations:
114
        # 1. client code has to know the number of distict values for the nominal variable 'flavors'
115
        # 2. client code has to provide the column names that will result after encoding the 'flavors' variable
116
        {'variable': 'flavors', 'columns': list(UNIQUE_FLAVORS)}]]
117
    cmd.execute()
118
119
    import numpy as np
120
    setattr(somagic.dataset, 'feature_vectors',
121
            np.array(somagic._data_manager.datapoints.observations[runtime_feature_names + ['flavors_' + x for x in UNIQUE_FLAVORS]]))
122
123
    MAX_FLAVORS_PER_DAATPOINT = max(
124
        [len(x) for x in [_ for _ in somagic._data_manager.datapoints.observations['flavors'] if type(_) is list]])
125
    return somagic.dataset, type_values, UNIQUE_FLAVORS, MAX_FLAVORS_PER_DAATPOINT, nb_columns_before
126
127
128
@pytest.fixture
129
def built_in_backends():
130
    from so_magic.data.backend.panda_handling.df_backend import magic_backends
131
    engine_backends = magic_backends()
132
    return engine_backends
133
134
135
@pytest.fixture
136
def tabular_operators(built_in_backends):
137
    operators = {
138
        'retriever': {
139
            'class': built_in_backends.backend_interfaces['retriever']['class_registry'].subclasses['pd'],
140
            'interface': {
141
                'column': '(identifier, data)',
142
                'row': '(identifier, data)',
143
                'nb_columns': '(data)',
144
                'nb_rows': '(data)',
145
                'get_numerical_attributes': '(data)',
146
            }
147
        },
148
        'iterator': {
149
            'class': built_in_backends.backend_interfaces['iterator']['class_registry'].subclasses['pd'],
150
            'interface': {
151
                'columnnames': '(data)',
152
                'itercolumns': '(data)',
153
                'iterrows': '(data)',
154
            },
155
        },
156
        'mutator': {
157
            'class': built_in_backends.backend_interfaces['mutator']['class_registry'].subclasses['pd'],
158
            'interface': {
159
                'add_column': '(datapoints, values, new_attribute, **kwargs)',
160
                'add_columns': '(datapoints, values, column_names, **kwargs)',
161
            },
162
        },
163
    }
164
    return {
165
        'operators': operators,
166
        'reverse_dict': {operator_dict['class']: key for key, operator_dict in operators.items()},
167
        'get_nb_args': lambda operator_interface_name, method_name: len(operators[operator_interface_name]['interface'][method_name].replace(', **kwargs', '').split(',')),
168
        # operator_name_2_required_methods
169
        'required_methods': iter(((operator_interface_name, v['interface'].keys())
0 ignored issues
show
Comprehensibility Best Practice introduced by
The variable v does not seem to be defined.
Loading history...
Comprehensibility Best Practice introduced by
The variable operator_interface_name does not seem to be defined.
Loading history...
170
                                  for operator_interface_name, v in operators.items()))
171
    }
172
173
174
@pytest.fixture
175
def assert_different_objects():
176
    def _assert_different_objects(objects):
177
        assert len(set([id(x) for x in objects])) == len(objects)
178
    return _assert_different_objects
179