| 1 |  |  | import os | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2 |  |  | import pytest | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5 |  |  | my_dir = os.path.dirname(os.path.realpath(__file__)) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 6 |  |  |  | 
            
                                                                                                            
                                                                
            
                                    
            
            
                | 7 |  |  | ####### Files and folders | 
            
                                                                        
                            
            
                                    
            
            
                | 8 |  |  | @pytest.fixture | 
            
                                                                        
                            
            
                                    
            
            
                | 9 |  |  | def tests_root_dir(): | 
            
                                                                        
                            
            
                                    
            
            
                | 10 |  |  |     return my_dir | 
            
                                                                                                            
                            
            
                                    
            
            
                | 11 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 12 |  |  | @pytest.fixture | 
            
                                                                                                            
                            
            
                                    
            
            
                | 13 |  |  | def tests_data_root(tests_root_dir): | 
            
                                                                                                            
                            
            
                                    
            
            
                | 14 |  |  |     return os.path.join(tests_root_dir, 'dts') | 
            
                                                                                                            
                            
            
                                    
            
            
                | 15 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 16 |  |  | # Test data | 
            
                                                                                                            
                            
            
                                    
            
            
                | 17 |  |  | @pytest.fixture | 
            
                                                                                                            
                            
            
                                    
            
            
                | 18 |  |  | def sample_json(tests_data_root): | 
            
                                                                                                            
                            
            
                                    
            
            
                | 19 |  |  |     return os.path.join(tests_data_root, 'sample-data.jsonlines') | 
            
                                                                                                            
                            
            
                                    
            
            
                | 20 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 21 |  |  | @pytest.fixture | 
            
                                                                                                            
                            
            
                                    
            
            
                | 22 |  |  | def sample_collaped_json(tests_data_root): | 
            
                                                                                                            
                            
            
                                    
            
            
                | 23 |  |  |     return os.path.join(tests_data_root, 'sample-data-collapsed.jsonlines') | 
            
                                                                                                            
                            
            
                                    
            
            
                | 24 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 25 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 26 |  |  | @pytest.fixture() | 
            
                                                                                                            
                            
            
                                    
            
            
                | 27 |  |  | def test_json_data(sample_json): | 
            
                                                                                                            
                            
            
                                    
            
            
                | 28 |  |  |     return { | 
            
                                                                                                            
                            
            
                                    
            
            
                | 29 |  |  |         'file_path': sample_json, | 
            
                                                                                                            
                            
            
                                    
            
            
                | 30 |  |  |         'nb_lines': 100, | 
            
                                                                                                            
                            
            
                                    
            
            
                | 31 |  |  |         'attributes': {'flavors', 'name', 'medical', 'description', 'image_urls', 'parents', 'negatives', 'grow_info', '_id', 'type', 'image_paths', 'effects'}, | 
            
                                                                                                            
                            
            
                                    
            
            
                | 32 |  |  |     } | 
            
                                                                                                            
                            
            
                                    
            
            
                | 33 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 34 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 35 |  |  | @pytest.fixture | 
            
                                                                                                            
                            
            
                                    
            
            
                | 36 |  |  | def somagic(): | 
            
                                                                                                            
                            
            
                                    
            
            
                | 37 |  |  |     from so_magic import init_so_magic | 
            
                                                                                                            
                            
            
                                    
            
            
                | 38 |  |  |     _ = init_so_magic() | 
            
                                                                                                            
                            
            
                                    
            
            
                | 39 |  |  |     return _ | 
            
                                                                                                            
                            
            
                                    
            
            
                | 40 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 41 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 42 |  |  | @pytest.fixture | 
            
                                                                                                            
                            
            
                                    
            
            
                | 43 |  |  | def data_manager(): | 
            
                                                                                                            
                            
            
                                    
            
            
                | 44 |  |  |     def getter(): | 
            
                                                                                                            
                            
            
                                    
            
            
                | 45 |  |  |         from so_magic.data import init_data_manager | 
            
                                                                                                            
                            
            
                                    
            
            
                | 46 |  |  |         from so_magic.data.backend import init_engine | 
            
                                                                                                            
                            
            
                                    
            
            
                | 47 |  |  |         data_manager = init_data_manager(init_engine(engine_type='pd')) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 48 |  |  |         return data_manager | 
            
                                                                                                            
                            
            
                                    
            
            
                | 49 |  |  |     return getter | 
            
                                                                                                            
                            
            
                                    
            
            
                | 50 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 51 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 52 |  |  | @pytest.fixture | 
            
                                                                                                            
                            
            
                                    
            
            
                | 53 |  |  | def read_observations(): | 
            
                                                                                                            
                            
            
                                    
            
            
                | 54 |  |  |     """Read a json lines formatted file and create the observations object (see Datapoints class).""" | 
            
                                                                                                            
                            
            
                                    
            
            
                | 55 |  |  |     def load_data(so_master, json_lines_formatted_file_path): | 
            
                                                                                                            
                            
            
                                    
            
            
                | 56 |  |  |         """Create the observations object for a Datapoints instance, given a data file. | 
            
                                                                                                            
                            
            
                                    
            
            
                | 57 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 58 |  |  |         Args: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 59 |  |  |             so_master (so_magic.so_master.SoMaster): an instance of SoMaster | 
            
                                                                                                            
                            
            
                                    
            
            
                | 60 |  |  |             json_lines_formatted_file_path (str): path to a json lines formatted file with the observations data | 
            
                                                                                                            
                            
            
                                    
            
            
                | 61 |  |  |         """ | 
            
                                                                                                            
                            
            
                                    
            
            
                | 62 |  |  |         cmd = so_master.command.observations_command | 
            
                                                                                                            
                            
            
                                    
            
            
                | 63 |  |  |         cmd.args = [json_lines_formatted_file_path] | 
            
                                                                                                            
                            
            
                                    
            
            
                | 64 |  |  |         cmd.execute() | 
            
                                                                                                            
                            
            
                                    
            
            
                | 65 |  |  |     return load_data | 
            
                                                                                                            
                            
            
                                    
            
            
                | 66 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 67 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 68 |  |  | @pytest.fixture | 
            
                                                                                                            
                            
            
                                    
            
            
                | 69 |  |  | def test_datapoints(read_observations, sample_collaped_json, somagic): | 
            
                                                                                                            
                            
            
                                    
            
            
                | 70 |  |  |     """Read the designated json lines 'test file' (which contains the 'test observations') as a Datapoints instance.""" | 
            
                                                                                                            
                            
            
                                    
            
            
                | 71 |  |  |     read_observations(somagic, sample_collaped_json) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 72 |  |  |     return somagic.datapoints | 
            
                                                                                                            
                            
            
                                    
            
            
                | 73 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 74 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 75 |  |  | @pytest.fixture | 
            
                                                                                                            
                            
            
                                    
            
            
                | 76 |  |  | def test_dataset(somagic, read_observations, sample_collaped_json): | 
            
                                                                                                            
                            
            
                                    
            
            
                | 77 |  |  |     """Dataset ready to be fed into a training/inference algorithm; feature vectors have been computed.""" | 
            
                                                                                                            
                            
            
                                    
            
            
                | 78 |  |  |     read_observations(somagic, sample_collaped_json) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 79 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 80 |  |  |     type_values = ['hybrid', 'indica', 'sativa'] | 
            
                                                                                                            
                            
            
                                    
            
            
                | 81 |  |  |     ATTRS2 = [f'type_{x}' for x in type_values] | 
            
                                                                                                            
                            
            
                                    
            
            
                | 82 |  |  |     from functools import reduce | 
            
                                                                                                            
                            
            
                                    
            
            
                | 83 |  |  |     UNIQUE_FLAVORS = reduce(lambda i, j: set(i).union(set(j)), | 
            
                                                                                                            
                            
            
                                    
            
            
                | 84 |  |  |                             [_ for _ in somagic._data_manager.datapoints.observations['flavors'] if _ is not None]) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 85 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 86 |  |  |     cmd = somagic._data_manager.command.select_variables_command | 
            
                                                                                                            
                            
            
                                    
            
            
                | 87 |  |  |     # current limitations: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 88 |  |  |     # 1. client code has to know the number of distict values for the nominal variable 'type' | 
            
                                                                                                            
                            
            
                                    
            
            
                | 89 |  |  |     # 2. client code has to provide the column names that will result after encoding the 'type' variable | 
            
                                                                                                            
                            
            
                                    
            
            
                | 90 |  |  |     cmd.args = [[ | 
            
                                                                                                            
                            
            
                                    
            
            
                | 91 |  |  |         # current limitations: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 92 |  |  |         # 1. client code has to know the number of distict values for the nominal variable 'type' | 
            
                                                                                                            
                            
            
                                    
            
            
                | 93 |  |  |         # 2. client code has to provide the column names that will result after encoding the 'type' variable | 
            
                                                                                                            
                            
            
                                    
            
            
                | 94 |  |  |         {'variable': 'type', 'columns': ATTRS2}, | 
            
                                                                                                            
                            
            
                                    
            
            
                | 95 |  |  |         # current limitations: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 96 |  |  |         # 1. client code has to know the number of distict values for the nominal variable 'flavors' | 
            
                                                                                                            
                            
            
                                    
            
            
                | 97 |  |  |         # 2. client code has to provide the column names that will result after encoding the 'flavors' variable | 
            
                                                                                                            
                            
            
                                    
            
            
                | 98 |  |  |         {'variable': 'flavors', 'columns': list(UNIQUE_FLAVORS)}]] | 
            
                                                                                                            
                            
            
                                    
            
            
                | 99 |  |  |     cmd.execute() | 
            
                                                                                                            
                            
            
                                    
            
            
                | 100 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 101 |  |  |     cmd = somagic._data_manager.command.one_hot_encoding_command | 
            
                                                                                                            
                            
            
                                    
            
            
                | 102 |  |  |     cmd.args = [somagic._data_manager.datapoints, 'type'] | 
            
                                                                                                            
                            
            
                                    
            
            
                | 103 |  |  |     cmd.execute() | 
            
                                                                                                            
                            
            
                                    
            
            
                | 104 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 105 |  |  |     assert set([type(x) for x in somagic._data_manager.datapoints.observations['flavors']]) == {list, type(None)} | 
            
                                                                                                            
                            
            
                                    
            
            
                | 106 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 107 |  |  |     nb_columns_before = len(somagic._data_manager.datapoints.observations.columns) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 108 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 109 |  |  |     cmd = somagic._data_manager.command.one_hot_encoding_list_command | 
            
                                                                                                            
                            
            
                                    
            
            
                | 110 |  |  |     cmd.args = [somagic._data_manager.datapoints, 'flavors'] | 
            
                                                                                                            
                            
            
                                    
            
            
                | 111 |  |  |     cmd.execute() | 
            
                                                                                                            
                            
            
                                    
            
            
                | 112 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 113 |  |  |     assert nb_columns_before + len(UNIQUE_FLAVORS) == len(somagic._data_manager.datapoints.observations.columns) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 114 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 115 |  |  |     import numpy as np | 
            
                                                                                                            
                            
            
                                    
            
            
                | 116 |  |  |     setattr(somagic.dataset, 'feature_vectors', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 117 |  |  |             np.array(somagic._data_manager.datapoints.observations[ATTRS2 + list(UNIQUE_FLAVORS)])) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 118 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 119 |  |  |     MAX_FLAVORS_PER_DAATPOINT = max( | 
            
                                                                                                            
                            
            
                                    
            
            
                | 120 |  |  |         [len(x) for x in [_ for _ in somagic._data_manager.datapoints.observations['flavors'] if type(_) is list]]) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 121 |  |  |     return somagic.dataset, type_values, UNIQUE_FLAVORS, MAX_FLAVORS_PER_DAATPOINT, nb_columns_before | 
            
                                                                                                            
                            
            
                                    
            
            
                | 122 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 123 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 124 |  |  | @pytest.fixture | 
            
                                                                                                            
                            
            
                                    
            
            
                | 125 |  |  | def built_in_backends(): | 
            
                                                                                                            
                            
            
                                    
            
            
                | 126 |  |  |     from so_magic.data.backend.panda_handling.df_backend import magic_backends | 
            
                                                                                                            
                            
            
                                    
            
            
                | 127 |  |  |     engine_backends = magic_backends() | 
            
                                                                                                            
                            
            
                                    
            
            
                | 128 |  |  |     return engine_backends | 
            
                                                                                                            
                            
            
                                    
            
            
                | 129 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 130 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 131 |  |  | @pytest.fixture | 
            
                                                                                                            
                            
            
                                    
            
            
                | 132 |  |  | def tabular_operators(built_in_backends): | 
            
                                                                                                            
                            
            
                                    
            
            
                | 133 |  |  |     operators = { | 
            
                                                                                                            
                            
            
                                    
            
            
                | 134 |  |  |         'retriever': { | 
            
                                                                                                            
                            
            
                                    
            
            
                | 135 |  |  |             'class': built_in_backends.backend_interfaces['retriever']['class_registry'].subclasses['pd'], | 
            
                                                                                                            
                            
            
                                    
            
            
                | 136 |  |  |             'interface': { | 
            
                                                                                                            
                            
            
                                    
            
            
                | 137 |  |  |                 'column': '(identifier, data)', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 138 |  |  |                 'row': '(identifier, data)', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 139 |  |  |                 'nb_columns': '(data)', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 140 |  |  |                 'nb_rows': '(data)', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 141 |  |  |                 'get_numerical_attributes': '(data)', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 142 |  |  |             } | 
            
                                                                                                            
                            
            
                                    
            
            
                | 143 |  |  |         }, | 
            
                                                                                                            
                            
            
                                    
            
            
                | 144 |  |  |         'iterator': { | 
            
                                                                                                            
                            
            
                                    
            
            
                | 145 |  |  |             'class': built_in_backends.backend_interfaces['iterator']['class_registry'].subclasses['pd'], | 
            
                                                                                                            
                            
            
                                    
            
            
                | 146 |  |  |             'interface': { | 
            
                                                                                                            
                            
            
                                    
            
            
                | 147 |  |  |                 'columnnames': '(data)', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 148 |  |  |                 'itercolumns': '(data)', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 149 |  |  |                 'iterrows': '(data)', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 150 |  |  |             }, | 
            
                                                                                                            
                            
            
                                    
            
            
                | 151 |  |  |         }, | 
            
                                                                                                            
                            
            
                                    
            
            
                | 152 |  |  |         'mutator': { | 
            
                                                                                                            
                            
            
                                    
            
            
                | 153 |  |  |             'class': built_in_backends.backend_interfaces['mutator']['class_registry'].subclasses['pd'], | 
            
                                                                                                            
                            
            
                                    
            
            
                | 154 |  |  |             'interface': { | 
            
                                                                                                            
                            
            
                                    
            
            
                | 155 |  |  |                 'add_column': '(datapoints, values, new_attribute, **kwargs)', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 156 |  |  |             }, | 
            
                                                                                                            
                            
            
                                    
            
            
                | 157 |  |  |         }, | 
            
                                                                                                            
                            
            
                                    
            
            
                | 158 |  |  |     } | 
            
                                                                                                            
                            
            
                                    
            
            
                | 159 |  |  |     return { | 
            
                                                                                                            
                            
            
                                    
            
            
                | 160 |  |  |         'operators': operators, | 
            
                                                                                                            
                            
            
                                    
            
            
                | 161 |  |  |         'reverse_dict': {operator_dict['class']: key for key, operator_dict in operators.items()}, | 
            
                                                                                                            
                            
            
                                    
            
            
                | 162 |  |  |         'get_nb_args': lambda operator_interface_name, method_name: len(operators[operator_interface_name]['interface'][method_name].replace(', **kwargs', '').split(',')), | 
            
                                                                                                            
                            
            
                                    
            
            
                | 163 |  |  |         # operator_name_2_required_methods | 
            
                                                                                                            
                            
            
                                    
            
            
                | 164 |  |  |         'required_methods': iter(((operator_interface_name, v['interface'].keys()) | 
                            
                    |  |  |  | 
                                                                                        
                                                                                            
                                                                                     | 
            
                                                                                                            
                            
            
                                    
            
            
                | 165 |  |  |                                   for operator_interface_name, v in operators.items())) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 166 |  |  |     } | 
            
                                                                                                            
                            
            
                                    
            
            
                | 167 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 168 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 169 |  |  | @pytest.fixture | 
            
                                                                                                            
                            
            
                                    
            
            
                | 170 |  |  | def assert_different_objects(): | 
            
                                                                                                            
                            
            
                                    
            
            
                | 171 |  |  |     def _assert_different_objects(objects): | 
            
                                                                                                            
                            
            
                                    
            
            
                | 172 |  |  |         assert len(set([id(x) for x in objects])) == len(objects) | 
            
                                                                                                            
                                                                
            
                                    
            
            
                | 173 |  |  |     return _assert_different_objects | 
            
                                                        
            
                                    
            
            
                | 174 |  |  |  |