Passed
Pull Request — dev (#32)
by Konstantinos
05:29 queued 03:32
created

conftest.sample_json()   A

Complexity

Conditions 1

Size

Total Lines 3
Code Lines 3

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
eloc 3
dl 0
loc 3
rs 10
c 0
b 0
f 0
cc 1
nop 1
1
import os
2
import pytest
3
4
5
my_dir = os.path.dirname(os.path.realpath(__file__))
6
7
####### Files and folders
8
@pytest.fixture
9
def tests_root_dir():
10
    return my_dir
11
12
@pytest.fixture
13
def tests_data_root(tests_root_dir):
14
    return os.path.join(tests_root_dir, 'dts')
15
16
# Test data
17
@pytest.fixture
18
def sample_json(tests_data_root):
19
    return os.path.join(tests_data_root, 'sample-data.jsonlines')
20
21
@pytest.fixture
22
def sample_collaped_json(tests_data_root):
23
    return os.path.join(tests_data_root, 'sample-data-collapsed.jsonlines')
24
25
26
@pytest.fixture()
27
def test_json_data(sample_json):
28
    return {
29
        'file_path': sample_json,
30
        'nb_lines': 100,
31
        'attributes': {'flavors', 'name', 'medical', 'description', 'image_urls', 'parents', 'negatives', 'grow_info', '_id', 'type', 'image_paths', 'effects'},
32
    }
33
34
35
@pytest.fixture
36
def somagic():
37
    from so_magic import init_so_magic
38
    _ = init_so_magic()
39
    return _
40
41
42
@pytest.fixture
43
def data_manager():
44
    def getter():
45
        from so_magic.data import init_data_manager
46
        from so_magic.data.backend import init_engine
47
48
        data_manager = init_data_manager(init_engine(engine_type='pd'))
49
50
        datapoints_fact = data_manager.engine.backend.datapoints_factory
51
        cmd_fact = data_manager.engine.backend.command_factory
52
53
        # test 1
54
        from so_magic.data.datapoints.datapoints import DatapointsFactory
55
        from so_magic.data.backend.engine_command_factory import MagicCommandFactory
56
57
        assert isinstance(datapoints_fact, DatapointsFactory)
58
        assert isinstance(cmd_fact, MagicCommandFactory)
59
60
        subjects = [datapoints_fact.subject, cmd_fact, data_manager.phi_class.subject]
61
        assert len(set([id(x._observers) for x in subjects])) == len(subjects)
62
63
        assert datapoints_fact.subject._observers[0] == data_manager.engine.datapoints_manager
64
        assert cmd_fact._observers[0] == data_manager.commands_manager.command.accumulator
65
        assert data_manager.phi_class.subject._observers[0] == data_manager.built_phis
66
67
        print(f"DTP FCT OBS: [{', '.join(str(_) for _ in datapoints_fact.subject._observers)}]")
68
        print(f"CMD FCT OBS: [{', '.join(str(_) for _ in cmd_fact._observers)}]")
69
        print(f"PHIFUNC class OBS: [{', '.join(str(_) for _ in data_manager.phi_class.subject._observers)}]")
70
        assert all([len(x._observers) == 1 for x in subjects])
71
        return data_manager
72
73
    return getter
74
75
76
@pytest.fixture
77
def test_data_manager(data_manager):
78
    return data_manager()
79
80
81
@pytest.fixture
82
def load_test_data(test_data_manager, sample_json):
83
    def load_data(json_lines_formatted_file_path):
84
        cmd = test_data_manager.command.observations_command
85
        cmd.args = [json_lines_formatted_file_path]
86
        cmd.execute()
87
    return lambda: load_data(sample_json)
88
89
90
@pytest.fixture
91
def read_observations():
92
    """Read a json lines formatted file and create the observations object (see Datapoints class)."""
93
    def load_data(so_master, json_lines_formatted_file_path):
94
        """Create the observations object for a Datapoints instance, given a data file.
95
96
        Args:
97
            so_master (so_magic.so_master.SoMaster): an instance of SoMaster
98
            json_lines_formatted_file_path (str): path to a json lines formatted file with the observations data
99
        """
100
        cmd = so_master.command.observations_command
101
        cmd.args = [json_lines_formatted_file_path]
102
        cmd.execute()
103
    return load_data
104
105
106
@pytest.fixture
107
def test_datapoints(read_observations, sample_collaped_json, somagic):
108
    """Read the designated json lines 'test file' (which contains the 'test observations') as a Datapoints instance."""
109
    read_observations(somagic, sample_collaped_json)
110
    return somagic.datapoints
111
112
113
@pytest.fixture
114
def test_dataset(somagic, read_observations, sample_collaped_json):
115
    """Dataset ready to be fed into a training/inference algorithm; feature vectors have been computed."""
116
    read_observations(somagic, sample_collaped_json)
117
118
    ATTRS2 = ['type_hybrid', 'type_indica', 'type_sativa']
119
    from functools import reduce
120
    UNIQUE_FLAVORS = reduce(lambda i, j: set(i).union(set(j)),
121
                            [_ for _ in somagic._data_manager.datapoints.observations['flavors'] if _ is not None])
122
123
    cmd = somagic._data_manager.command.select_variables_command
124
    # current limitations:
125
    # 1. client code has to know the number of distict values for the nominal variable 'type'
126
    # 2. client code has to provide the column names that will result after encoding the 'type' variable
127
    cmd.args = [[
128
        # current limitations:
129
        # 1. client code has to know the number of distict values for the nominal variable 'type'
130
        # 2. client code has to provide the column names that will result after encoding the 'type' variable
131
        {'variable': 'type', 'columns': ATTRS2},
132
        # current limitations:
133
        # 1. client code has to know the number of distict values for the nominal variable 'flavors'
134
        # 2. client code has to provide the column names that will result after encoding the 'flavors' variable
135
        {'variable': 'flavors', 'columns': list(UNIQUE_FLAVORS)}]]
136
    cmd.execute()
137
138
    cmd = somagic._data_manager.command.one_hot_encoding_command
139
    cmd.args = [somagic._data_manager.datapoints, 'type']
140
    cmd.execute()
141
142
    cmd = somagic._data_manager.command.one_hot_encoding_list_command
143
    cmd.args = [somagic._data_manager.datapoints, 'flavors']
144
    cmd.execute()
145
146
    import numpy as np
147
148
    setattr(somagic.dataset, 'feature_vectors',
149
            np.array(somagic._data_manager.datapoints.observations[ATTRS2 + list(UNIQUE_FLAVORS)]))
150
151
    return somagic.dataset
152
153
154
@pytest.fixture
155
def built_in_backends():
156
    from so_magic.data.backend.panda_handling.df_backend import magic_backends
157
    engine_backends = magic_backends()
158
    return engine_backends
159