1 | import os |
||
2 | import pytest |
||
3 | |||
4 | |||
5 | my_dir = os.path.dirname(os.path.realpath(__file__)) |
||
6 | |||
7 | ####### Files and folders |
||
8 | @pytest.fixture |
||
9 | def tests_root_dir(): |
||
10 | return my_dir |
||
11 | |||
12 | @pytest.fixture |
||
13 | def tests_data_root(tests_root_dir): |
||
14 | return os.path.join(tests_root_dir, 'dts') |
||
15 | |||
16 | # Test data |
||
17 | @pytest.fixture |
||
18 | def sample_json(tests_data_root): |
||
19 | return os.path.join(tests_data_root, 'sample-data.jsonlines') |
||
20 | |||
21 | @pytest.fixture |
||
22 | def sample_collaped_json(tests_data_root): |
||
23 | return os.path.join(tests_data_root, 'sample-data-collapsed.jsonlines') |
||
24 | |||
25 | |||
26 | @pytest.fixture() |
||
27 | def test_json_data(sample_json): |
||
28 | return { |
||
29 | 'file_path': sample_json, |
||
30 | 'nb_lines': 100, |
||
31 | 'attributes': {'flavors', 'name', 'medical', 'description', 'image_urls', 'parents', 'negatives', 'grow_info', '_id', 'type', 'image_paths', 'effects'}, |
||
32 | } |
||
33 | |||
34 | |||
35 | @pytest.fixture |
||
36 | def somagic(): |
||
37 | from so_magic import init_so_magic |
||
38 | _ = init_so_magic() |
||
39 | return _ |
||
40 | |||
41 | |||
42 | @pytest.fixture |
||
43 | def data_manager(): |
||
44 | def getter(): |
||
45 | from so_magic.data import init_data_manager |
||
46 | from so_magic.data.backend import init_engine |
||
47 | data_manager = init_data_manager(init_engine(engine_type='pd')) |
||
48 | return data_manager |
||
49 | return getter |
||
50 | |||
51 | |||
52 | @pytest.fixture |
||
53 | def read_observations(): |
||
54 | """Read a json lines formatted file and create the observations object (see Datapoints class).""" |
||
55 | def load_data(so_master, json_lines_formatted_file_path): |
||
56 | """Create the observations object for a Datapoints instance, given a data file. |
||
57 | |||
58 | Args: |
||
59 | so_master (so_magic.so_master.SoMaster): an instance of SoMaster |
||
60 | json_lines_formatted_file_path (str): path to a json lines formatted file with the observations data |
||
61 | """ |
||
62 | cmd = so_master.command.observations_command |
||
63 | cmd.args = [json_lines_formatted_file_path] |
||
64 | cmd.execute() |
||
65 | return load_data |
||
66 | |||
67 | |||
68 | @pytest.fixture |
||
69 | def test_datapoints(read_observations, sample_collaped_json, somagic): |
||
70 | """Read the designated json lines 'test file' (which contains the 'test observations') as a Datapoints instance.""" |
||
71 | read_observations(somagic, sample_collaped_json) |
||
72 | return somagic.datapoints |
||
73 | |||
74 | |||
75 | @pytest.fixture |
||
76 | def test_dataset(somagic, read_observations, sample_collaped_json): |
||
77 | """Dataset ready to be fed into a training/inference algorithm; feature vectors have been computed.""" |
||
78 | read_observations(somagic, sample_collaped_json) |
||
79 | |||
80 | type_values = ['hybrid', 'indica', 'sativa'] |
||
81 | ATTRS2 = [f'type_{x}' for x in type_values] |
||
82 | from functools import reduce |
||
83 | UNIQUE_FLAVORS = reduce(lambda i, j: set(i).union(set(j)), |
||
84 | [_ for _ in somagic._data_manager.datapoints.observations['flavors'] if _ is not None]) |
||
85 | |||
86 | cmd = somagic._data_manager.command.select_variables_command |
||
87 | # current limitations: |
||
88 | # 1. client code has to know the number of distict values for the nominal variable 'type' |
||
89 | # 2. client code has to provide the column names that will result after encoding the 'type' variable |
||
90 | cmd.args = [[ |
||
91 | # current limitations: |
||
92 | # 1. client code has to know the number of distict values for the nominal variable 'type' |
||
93 | # 2. client code has to provide the column names that will result after encoding the 'type' variable |
||
94 | {'variable': 'type', 'columns': ATTRS2}, |
||
95 | # current limitations: |
||
96 | # 1. client code has to know the number of distict values for the nominal variable 'flavors' |
||
97 | # 2. client code has to provide the column names that will result after encoding the 'flavors' variable |
||
98 | {'variable': 'flavors', 'columns': list(UNIQUE_FLAVORS)}]] |
||
99 | cmd.execute() |
||
100 | |||
101 | cmd = somagic._data_manager.command.one_hot_encoding_command |
||
102 | cmd.args = [somagic._data_manager.datapoints, 'type'] |
||
103 | cmd.execute() |
||
104 | |||
105 | assert set([type(x) for x in somagic._data_manager.datapoints.observations['flavors']]) == {list, type(None)} |
||
106 | |||
107 | nb_columns_before = len(somagic._data_manager.datapoints.observations.columns) |
||
108 | |||
109 | cmd = somagic._data_manager.command.one_hot_encoding_list_command |
||
110 | cmd.args = [somagic._data_manager.datapoints, 'flavors'] |
||
111 | cmd.execute() |
||
112 | |||
113 | assert nb_columns_before + len(UNIQUE_FLAVORS) == len(somagic._data_manager.datapoints.observations.columns) |
||
114 | |||
115 | import numpy as np |
||
116 | setattr(somagic.dataset, 'feature_vectors', |
||
117 | np.array(somagic._data_manager.datapoints.observations[ATTRS2 + list(UNIQUE_FLAVORS)])) |
||
118 | |||
119 | MAX_FLAVORS_PER_DAATPOINT = max( |
||
120 | [len(x) for x in [_ for _ in somagic._data_manager.datapoints.observations['flavors'] if type(_) is list]]) |
||
121 | return somagic.dataset, type_values, UNIQUE_FLAVORS, MAX_FLAVORS_PER_DAATPOINT, nb_columns_before |
||
122 | |||
123 | |||
124 | @pytest.fixture |
||
125 | def built_in_backends(): |
||
126 | from so_magic.data.backend.panda_handling.df_backend import magic_backends |
||
127 | engine_backends = magic_backends() |
||
128 | return engine_backends |
||
129 | |||
130 | |||
131 | @pytest.fixture |
||
132 | def tabular_operators(built_in_backends): |
||
133 | operators = { |
||
134 | 'retriever': { |
||
135 | 'class': built_in_backends.backend_interfaces['retriever']['class_registry'].subclasses['pd'], |
||
136 | 'interface': { |
||
137 | 'column': '(identifier, data)', |
||
138 | 'row': '(identifier, data)', |
||
139 | 'nb_columns': '(data)', |
||
140 | 'nb_rows': '(data)', |
||
141 | 'get_numerical_attributes': '(data)', |
||
142 | } |
||
143 | }, |
||
144 | 'iterator': { |
||
145 | 'class': built_in_backends.backend_interfaces['iterator']['class_registry'].subclasses['pd'], |
||
146 | 'interface': { |
||
147 | 'columnnames': '(data)', |
||
148 | 'itercolumns': '(data)', |
||
149 | 'iterrows': '(data)', |
||
150 | }, |
||
151 | }, |
||
152 | 'mutator': { |
||
153 | 'class': built_in_backends.backend_interfaces['mutator']['class_registry'].subclasses['pd'], |
||
154 | 'interface': { |
||
155 | 'add_column': '(datapoints, values, new_attribute, **kwargs)', |
||
156 | }, |
||
157 | }, |
||
158 | } |
||
159 | return { |
||
160 | 'operators': operators, |
||
161 | 'reverse_dict': {operator_dict['class']: key for key, operator_dict in operators.items()}, |
||
162 | 'get_nb_args': lambda operator_interface_name, method_name: len(operators[operator_interface_name]['interface'][method_name].replace(', **kwargs', '').split(',')), |
||
163 | # operator_name_2_required_methods |
||
164 | 'required_methods': iter(((operator_interface_name, v['interface'].keys()) |
||
0 ignored issues
–
show
Comprehensibility
Best Practice
introduced
by
![]() Comprehensibility
Best Practice
introduced
by
|
|||
165 | for operator_interface_name, v in operators.items())) |
||
166 | } |
||
167 | |||
168 | |||
169 | @pytest.fixture |
||
170 | def assert_different_objects(): |
||
171 | def _assert_different_objects(objects): |
||
172 | assert len(set([id(x) for x in objects])) == len(objects) |
||
173 | return _assert_different_objects |
||
174 |