Passed
Pull Request — dev (#32)
by Konstantinos
02:51 queued 01:25
created

conftest.tabular_operators()   A

Complexity

Conditions 2

Size

Total Lines 35
Code Lines 27

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
cc 2
eloc 27
nop 1
dl 0
loc 35
rs 9.232
c 0
b 0
f 0
1
import os
2
import pytest
3
4
5
my_dir = os.path.dirname(os.path.realpath(__file__))
6
7
####### Files and folders
8
@pytest.fixture
9
def tests_root_dir():
10
    return my_dir
11
12
@pytest.fixture
13
def tests_data_root(tests_root_dir):
14
    return os.path.join(tests_root_dir, 'dts')
15
16
# Test data
17
@pytest.fixture
18
def sample_json(tests_data_root):
19
    return os.path.join(tests_data_root, 'sample-data.jsonlines')
20
21
@pytest.fixture
22
def sample_collaped_json(tests_data_root):
23
    return os.path.join(tests_data_root, 'sample-data-collapsed.jsonlines')
24
25
26
@pytest.fixture()
27
def test_json_data(sample_json):
28
    return {
29
        'file_path': sample_json,
30
        'nb_lines': 100,
31
        'attributes': {'flavors', 'name', 'medical', 'description', 'image_urls', 'parents', 'negatives', 'grow_info', '_id', 'type', 'image_paths', 'effects'},
32
    }
33
34
35
@pytest.fixture
36
def somagic():
37
    from so_magic import init_so_magic
38
    _ = init_so_magic()
39
    return _
40
41
42
@pytest.fixture
43
def data_manager():
44
    def getter():
45
        from so_magic.data import init_data_manager
46
        from so_magic.data.backend import init_engine
47
48
        data_manager = init_data_manager(init_engine(engine_type='pd'))
49
50
        datapoints_fact = data_manager.engine.backend.datapoints_factory
51
        cmd_fact = data_manager.engine.backend.command_factory
52
53
        # test 1
54
        from so_magic.data.datapoints.datapoints import DatapointsFactory
55
        from so_magic.data.backend.engine_command_factory import MagicCommandFactory
56
57
        assert isinstance(datapoints_fact, DatapointsFactory)
58
        assert isinstance(cmd_fact, MagicCommandFactory)
59
60
        subjects = [datapoints_fact.subject, cmd_fact.subject, data_manager.phi_class.subject]
61
        assert len(set([id(x._observers) for x in subjects])) == len(subjects)
62
63
        assert datapoints_fact.subject._observers[0] == data_manager.engine.datapoints_manager
64
        assert cmd_fact.subject._observers[0] == data_manager.commands_manager.command.accumulator
65
        assert data_manager.phi_class.subject._observers[0] == data_manager.built_phis
66
67
        print(f"DTP FCT OBS: [{', '.join(str(_) for _ in datapoints_fact.subject._observers)}]")
68
        print(f"CMD FCT OBS: [{', '.join(str(_) for _ in cmd_fact.subject._observers)}]")
69
        print(f"PHIFUNC class OBS: [{', '.join(str(_) for _ in data_manager.phi_class.subject._observers)}]")
70
        assert all([len(x._observers) == 1 for x in subjects])
71
        return data_manager
72
73
    return getter
74
75
76
@pytest.fixture
77
def read_observations():
78
    """Read a json lines formatted file and create the observations object (see Datapoints class)."""
79
    def load_data(so_master, json_lines_formatted_file_path):
80
        """Create the observations object for a Datapoints instance, given a data file.
81
82
        Args:
83
            so_master (so_magic.so_master.SoMaster): an instance of SoMaster
84
            json_lines_formatted_file_path (str): path to a json lines formatted file with the observations data
85
        """
86
        cmd = so_master.command.observations_command
87
        cmd.args = [json_lines_formatted_file_path]
88
        cmd.execute()
89
    return load_data
90
91
92
@pytest.fixture
93
def test_datapoints(read_observations, sample_collaped_json, somagic):
94
    """Read the designated json lines 'test file' (which contains the 'test observations') as a Datapoints instance."""
95
    read_observations(somagic, sample_collaped_json)
96
    return somagic.datapoints
97
98
99
@pytest.fixture
100
def test_dataset(somagic, read_observations, sample_collaped_json):
101
    """Dataset ready to be fed into a training/inference algorithm; feature vectors have been computed."""
102
    read_observations(somagic, sample_collaped_json)
103
104
    ATTRS2 = ['type_hybrid', 'type_indica', 'type_sativa']
105
    from functools import reduce
106
    UNIQUE_FLAVORS = reduce(lambda i, j: set(i).union(set(j)),
107
                            [_ for _ in somagic._data_manager.datapoints.observations['flavors'] if _ is not None])
108
109
    cmd = somagic._data_manager.command.select_variables_command
110
    # current limitations:
111
    # 1. client code has to know the number of distict values for the nominal variable 'type'
112
    # 2. client code has to provide the column names that will result after encoding the 'type' variable
113
    cmd.args = [[
114
        # current limitations:
115
        # 1. client code has to know the number of distict values for the nominal variable 'type'
116
        # 2. client code has to provide the column names that will result after encoding the 'type' variable
117
        {'variable': 'type', 'columns': ATTRS2},
118
        # current limitations:
119
        # 1. client code has to know the number of distict values for the nominal variable 'flavors'
120
        # 2. client code has to provide the column names that will result after encoding the 'flavors' variable
121
        {'variable': 'flavors', 'columns': list(UNIQUE_FLAVORS)}]]
122
    cmd.execute()
123
124
    cmd = somagic._data_manager.command.one_hot_encoding_command
125
    cmd.args = [somagic._data_manager.datapoints, 'type']
126
    cmd.execute()
127
128
    cmd = somagic._data_manager.command.one_hot_encoding_list_command
129
    cmd.args = [somagic._data_manager.datapoints, 'flavors']
130
    cmd.execute()
131
132
    import numpy as np
133
134
    setattr(somagic.dataset, 'feature_vectors',
135
            np.array(somagic._data_manager.datapoints.observations[ATTRS2 + list(UNIQUE_FLAVORS)]))
136
137
    return somagic.dataset
138
139
140
@pytest.fixture
141
def built_in_backends():
142
    from so_magic.data.backend.panda_handling.df_backend import magic_backends
143
    engine_backends = magic_backends()
144
    return engine_backends
145
146
147
@pytest.fixture
148
def tabular_operators(built_in_backends):
149
    operators = {
150
        'retriever': {
151
            'class': built_in_backends.backend_interfaces['retriever']['class_registry'].subclasses['pd'],
152
            'interface': {
153
                'column': '(identifier, data)',
154
                'row': '(identifier, data)',
155
                'nb_columns': '(data)',
156
                'nb_rows': '(data)',
157
                'get_numerical_attributes': '(data)',
158
            }
159
        },
160
        'iterator': {
161
            'class': built_in_backends.backend_interfaces['iterator']['class_registry'].subclasses['pd'],
162
            'interface': {
163
                'columnnames': '(data)',
164
                'itercolumns': '(data)',
165
                'iterrows': '(data)',
166
            },
167
        },
168
        'mutator': {
169
            'class': built_in_backends.backend_interfaces['mutator']['class_registry'].subclasses['pd'],
170
            'interface': {
171
                'add_column': '(datapoints, values, new_attribute, **kwargs)',
172
            },
173
        },
174
    }
175
    return {
176
        'operators': operators,
177
        'reverse_dict': {operator_dict['class']: key for key, operator_dict in operators.items()},
178
        'get_nb_args': lambda operator_interface_name, method_name: len(operators[operator_interface_name]['interface'][method_name].replace(', **kwargs', '').split(',')),
179
        # operator_name_2_required_methods
180
        'required_methods': iter(((operator_interface_name, v['interface'].keys())
0 ignored issues
show
Comprehensibility Best Practice introduced by
The variable v does not seem to be defined.
Loading history...
Comprehensibility Best Practice introduced by
The variable operator_interface_name does not seem to be defined.
Loading history...
181
                                  for operator_interface_name, v in operators.items()))
182
    }
183