Passed
Pull Request — dev (#32)
by Konstantinos
03:37 queued 02:15
created

conftest   A

Complexity

Total Complexity 14

Size/Duplication

Total Lines 183
Duplicated Lines 0 %

Importance

Changes 0
Metric Value
eloc 117
dl 0
loc 183
rs 10
c 0
b 0
f 0
wmc 14

12 Functions

Rating   Name   Duplication   Size   Complexity  
A sample_json() 0 3 1
A tests_data_root() 0 3 1
A sample_collaped_json() 0 3 1
A tests_root_dir() 0 3 1
A test_json_data() 0 6 1
A somagic() 0 5 1
A tabular_operators() 0 35 2
A test_dataset() 0 39 2
A built_in_backends() 0 5 1
A data_manager() 0 33 1
A test_datapoints() 0 5 1
A read_observations() 0 14 1
1
import os
2
import pytest
3
4
5
my_dir = os.path.dirname(os.path.realpath(__file__))
6
7
####### Files and folders
8
@pytest.fixture
9
def tests_root_dir():
10
    return my_dir
11
12
@pytest.fixture
13
def tests_data_root(tests_root_dir):
14
    return os.path.join(tests_root_dir, 'dts')
15
16
# Test data
17
@pytest.fixture
18
def sample_json(tests_data_root):
19
    return os.path.join(tests_data_root, 'sample-data.jsonlines')
20
21
@pytest.fixture
22
def sample_collaped_json(tests_data_root):
23
    return os.path.join(tests_data_root, 'sample-data-collapsed.jsonlines')
24
25
26
@pytest.fixture()
27
def test_json_data(sample_json):
28
    return {
29
        'file_path': sample_json,
30
        'nb_lines': 100,
31
        'attributes': {'flavors', 'name', 'medical', 'description', 'image_urls', 'parents', 'negatives', 'grow_info', '_id', 'type', 'image_paths', 'effects'},
32
    }
33
34
35
@pytest.fixture
36
def somagic():
37
    from so_magic import init_so_magic
38
    _ = init_so_magic()
39
    return _
40
41
42
@pytest.fixture
43
def data_manager():
44
    def getter():
45
        from so_magic.data import init_data_manager
46
        from so_magic.data.backend import init_engine
47
48
        data_manager = init_data_manager(init_engine(engine_type='pd'))
49
50
        datapoints_fact = data_manager.engine.backend.datapoints_factory
51
        cmd_fact = data_manager.engine.backend.command_factory
52
53
        # test 1
54
        from so_magic.data.datapoints.datapoints import DatapointsFactory
55
        from so_magic.data.backend.engine_command_factory import MagicCommandFactory
56
57
        assert isinstance(datapoints_fact, DatapointsFactory)
58
        assert isinstance(cmd_fact, MagicCommandFactory)
59
60
        subjects = [datapoints_fact.subject, cmd_fact.subject, data_manager.phi_class.subject]
61
        assert len(set([id(x._observers) for x in subjects])) == len(subjects)
62
63
        assert datapoints_fact.subject._observers[0] == data_manager.engine.datapoints_manager
64
        assert cmd_fact.subject._observers[0] == data_manager.commands_manager.command.accumulator
65
        assert id(data_manager.phi_class.subject._observers[0]) == id(data_manager.built_phis)
66
        assert data_manager.phi_class.subject._observers[0] == data_manager.built_phis
67
68
        print(f"DTP FCT OBS: [{', '.join(str(_) for _ in datapoints_fact.subject._observers)}]")
69
        print(f"CMD FCT OBS: [{', '.join(str(_) for _ in cmd_fact.subject._observers)}]")
70
        print(f"PHIFUNC class OBS: [{', '.join(str(_) for _ in data_manager.phi_class.subject._observers)}]")
71
        assert all([len(x._observers) == 1 for x in subjects])
72
        return data_manager
73
74
    return getter
75
76
77
@pytest.fixture
78
def read_observations():
79
    """Read a json lines formatted file and create the observations object (see Datapoints class)."""
80
    def load_data(so_master, json_lines_formatted_file_path):
81
        """Create the observations object for a Datapoints instance, given a data file.
82
83
        Args:
84
            so_master (so_magic.so_master.SoMaster): an instance of SoMaster
85
            json_lines_formatted_file_path (str): path to a json lines formatted file with the observations data
86
        """
87
        cmd = so_master.command.observations_command
88
        cmd.args = [json_lines_formatted_file_path]
89
        cmd.execute()
90
    return load_data
91
92
93
@pytest.fixture
94
def test_datapoints(read_observations, sample_collaped_json, somagic):
95
    """Read the designated json lines 'test file' (which contains the 'test observations') as a Datapoints instance."""
96
    read_observations(somagic, sample_collaped_json)
97
    return somagic.datapoints
98
99
100
@pytest.fixture
101
def test_dataset(somagic, read_observations, sample_collaped_json):
102
    """Dataset ready to be fed into a training/inference algorithm; feature vectors have been computed."""
103
    read_observations(somagic, sample_collaped_json)
104
105
    ATTRS2 = ['type_hybrid', 'type_indica', 'type_sativa']
106
    from functools import reduce
107
    UNIQUE_FLAVORS = reduce(lambda i, j: set(i).union(set(j)),
108
                            [_ for _ in somagic._data_manager.datapoints.observations['flavors'] if _ is not None])
109
110
    cmd = somagic._data_manager.command.select_variables_command
111
    # current limitations:
112
    # 1. client code has to know the number of distict values for the nominal variable 'type'
113
    # 2. client code has to provide the column names that will result after encoding the 'type' variable
114
    cmd.args = [[
115
        # current limitations:
116
        # 1. client code has to know the number of distict values for the nominal variable 'type'
117
        # 2. client code has to provide the column names that will result after encoding the 'type' variable
118
        {'variable': 'type', 'columns': ATTRS2},
119
        # current limitations:
120
        # 1. client code has to know the number of distict values for the nominal variable 'flavors'
121
        # 2. client code has to provide the column names that will result after encoding the 'flavors' variable
122
        {'variable': 'flavors', 'columns': list(UNIQUE_FLAVORS)}]]
123
    cmd.execute()
124
125
    cmd = somagic._data_manager.command.one_hot_encoding_command
126
    cmd.args = [somagic._data_manager.datapoints, 'type']
127
    cmd.execute()
128
129
    cmd = somagic._data_manager.command.one_hot_encoding_list_command
130
    cmd.args = [somagic._data_manager.datapoints, 'flavors']
131
    cmd.execute()
132
133
    import numpy as np
134
135
    setattr(somagic.dataset, 'feature_vectors',
136
            np.array(somagic._data_manager.datapoints.observations[ATTRS2 + list(UNIQUE_FLAVORS)]))
137
138
    return somagic.dataset
139
140
141
@pytest.fixture
142
def built_in_backends():
143
    from so_magic.data.backend.panda_handling.df_backend import magic_backends
144
    engine_backends = magic_backends()
145
    return engine_backends
146
147
148
@pytest.fixture
149
def tabular_operators(built_in_backends):
150
    operators = {
151
        'retriever': {
152
            'class': built_in_backends.backend_interfaces['retriever']['class_registry'].subclasses['pd'],
153
            'interface': {
154
                'column': '(identifier, data)',
155
                'row': '(identifier, data)',
156
                'nb_columns': '(data)',
157
                'nb_rows': '(data)',
158
                'get_numerical_attributes': '(data)',
159
            }
160
        },
161
        'iterator': {
162
            'class': built_in_backends.backend_interfaces['iterator']['class_registry'].subclasses['pd'],
163
            'interface': {
164
                'columnnames': '(data)',
165
                'itercolumns': '(data)',
166
                'iterrows': '(data)',
167
            },
168
        },
169
        'mutator': {
170
            'class': built_in_backends.backend_interfaces['mutator']['class_registry'].subclasses['pd'],
171
            'interface': {
172
                'add_column': '(datapoints, values, new_attribute, **kwargs)',
173
            },
174
        },
175
    }
176
    return {
177
        'operators': operators,
178
        'reverse_dict': {operator_dict['class']: key for key, operator_dict in operators.items()},
179
        'get_nb_args': lambda operator_interface_name, method_name: len(operators[operator_interface_name]['interface'][method_name].replace(', **kwargs', '').split(',')),
180
        # operator_name_2_required_methods
181
        'required_methods': iter(((operator_interface_name, v['interface'].keys())
0 ignored issues
show
Comprehensibility Best Practice introduced by
The variable v does not seem to be defined.
Loading history...
Comprehensibility Best Practice introduced by
The variable operator_interface_name does not seem to be defined.
Loading history...
182
                                  for operator_interface_name, v in operators.items()))
183
    }
184