conftest.test_dataset() - Code Metrics - Inspection of "Merge pull request #33 from boromir674/release" - boromir674/so-magic - Measure and Improve Code Quality continuously with Scrutinizer

Passed

Push — master ( 96da92...a1b572 )

by Konstantinos

created 2021-06-24 15:13 UTC

conftest.test_dataset() A

↳ Parent: conftest

Complexity

Conditions

Size

Total Lines	47
Code Lines	28

Duplication

Lines	0
Ratio	0 %

Importance

Changes

Metric	Value
cc	2
eloc	28
nop	3
dl	0
loc	47
rs	9.208
c	0
b	0
f	0

import os
import pytest


my_dir = os.path.dirname(os.path.realpath(__file__))

####### Files and folders
@pytest.fixture
def tests_root_dir():
    return my_dir

@pytest.fixture
def tests_data_root(tests_root_dir):
    return os.path.join(tests_root_dir, 'dts')

# Test data
@pytest.fixture
def sample_json(tests_data_root):
    return os.path.join(tests_data_root, 'sample-data.jsonlines')

@pytest.fixture
def sample_collaped_json(tests_data_root):
    return os.path.join(tests_data_root, 'sample-data-collapsed.jsonlines')


@pytest.fixture()
def test_json_data(sample_json):
    return {
        'file_path': sample_json,
        'nb_lines': 100,
        'attributes': {'flavors', 'name', 'medical', 'description', 'image_urls', 'parents', 'negatives', 'grow_info', '_id', 'type', 'image_paths', 'effects'},
    }


@pytest.fixture
def somagic():
    from so_magic import init_so_magic
    _ = init_so_magic()
    return _


@pytest.fixture
def data_manager():
    def getter():
        from so_magic.data import init_data_manager
        from so_magic.data.backend import init_engine
        data_manager = init_data_manager(init_engine(engine_type='pd'))
        return data_manager
    return getter


@pytest.fixture
def read_observations():
    """Read a json lines formatted file and create the observations object (see Datapoints class)."""
    def load_data(so_master, json_lines_formatted_file_path):
        """Create the observations object for a Datapoints instance, given a data file.

        Args:
            so_master (so_magic.so_master.SoMaster): an instance of SoMaster
            json_lines_formatted_file_path (str): path to a json lines formatted file with the observations data
        """
        cmd = so_master.command.observations_command
        cmd.args = [json_lines_formatted_file_path]
        cmd.execute()
    return load_data


@pytest.fixture
def test_datapoints(read_observations, sample_collaped_json, somagic):
    """Read the designated json lines 'test file' (which contains the 'test observations') as a Datapoints instance."""
    read_observations(somagic, sample_collaped_json)
    return somagic.datapoints


@pytest.fixture
def test_dataset(somagic, read_observations, sample_collaped_json):
    """Dataset ready to be fed into a training/inference algorithm; feature vectors have been computed."""
    read_observations(somagic, sample_collaped_json)

    type_values = ['hybrid', 'indica', 'sativa']
    ATTRS2 = [f'type_{x}' for x in type_values]
    from functools import reduce
    UNIQUE_FLAVORS = reduce(lambda i, j: set(i).union(set(j)),
                            [_ for _ in somagic._data_manager.datapoints.observations['flavors'] if _ is not None])

    cmd = somagic._data_manager.command.select_variables_command
    # current limitations:
    # 1. client code has to know the number of distict values for the nominal variable 'type'
    # 2. client code has to provide the column names that will result after encoding the 'type' variable
    cmd.args = [[
        # current limitations:
        # 1. client code has to know the number of distict values for the nominal variable 'type'
        # 2. client code has to provide the column names that will result after encoding the 'type' variable
        {'variable': 'type', 'columns': ATTRS2},
        # current limitations:
        # 1. client code has to know the number of distict values for the nominal variable 'flavors'
        # 2. client code has to provide the column names that will result after encoding the 'flavors' variable
        {'variable': 'flavors', 'columns': list(UNIQUE_FLAVORS)}]]
    cmd.execute()

    cmd = somagic._data_manager.command.one_hot_encoding_command
    cmd.args = [somagic._data_manager.datapoints, 'type']
    cmd.execute()

    assert set([type(x) for x in somagic._data_manager.datapoints.observations['flavors']]) == {list, type(None)}

    nb_columns_before = len(somagic._data_manager.datapoints.observations.columns)

    cmd = somagic._data_manager.command.one_hot_encoding_list_command
    cmd.args = [somagic._data_manager.datapoints, 'flavors']
    cmd.execute()

    assert nb_columns_before + len(UNIQUE_FLAVORS) == len(somagic._data_manager.datapoints.observations.columns)

    import numpy as np
    setattr(somagic.dataset, 'feature_vectors',
            np.array(somagic._data_manager.datapoints.observations[ATTRS2 + list(UNIQUE_FLAVORS)]))

    MAX_FLAVORS_PER_DAATPOINT = max(
        [len(x) for x in [_ for _ in somagic._data_manager.datapoints.observations['flavors'] if type(_) is list]])
    return somagic.dataset, type_values, UNIQUE_FLAVORS, MAX_FLAVORS_PER_DAATPOINT, nb_columns_before


@pytest.fixture
def built_in_backends():
    from so_magic.data.backend.panda_handling.df_backend import magic_backends
    engine_backends = magic_backends()
    return engine_backends


@pytest.fixture
def tabular_operators(built_in_backends):
    operators = {
        'retriever': {
            'class': built_in_backends.backend_interfaces['retriever']['class_registry'].subclasses['pd'],
            'interface': {
                'column': '(identifier, data)',
                'row': '(identifier, data)',
                'nb_columns': '(data)',
                'nb_rows': '(data)',
                'get_numerical_attributes': '(data)',
            }
        },
        'iterator': {
            'class': built_in_backends.backend_interfaces['iterator']['class_registry'].subclasses['pd'],
            'interface': {
                'columnnames': '(data)',
                'itercolumns': '(data)',
                'iterrows': '(data)',
            },
        },
        'mutator': {
            'class': built_in_backends.backend_interfaces['mutator']['class_registry'].subclasses['pd'],
            'interface': {
                'add_column': '(datapoints, values, new_attribute, **kwargs)',
            },
        },
    }
    return {
        'operators': operators,
        'reverse_dict': {operator_dict['class']: key for key, operator_dict in operators.items()},
        'get_nb_args': lambda operator_interface_name, method_name: len(operators[operator_interface_name]['interface'][method_name].replace(', **kwargs', '').split(',')),
        # operator_name_2_required_methods
        'required_methods': iter(((operator_interface_name, v['interface'].keys())

                                  for operator_interface_name, v in operators.items()))
    }


@pytest.fixture
def assert_different_objects():
    def _assert_different_objects(objects):
        assert len(set([id(x) for x in objects])) == len(objects)
    return _assert_different_objects


1			import os
2			import pytest
3
4
5			my_dir = os.path.dirname(os.path.realpath(__file__))
6
7			####### Files and folders
8			@pytest.fixture
9			def tests_root_dir():
10			return my_dir
11
12			@pytest.fixture
13			def tests_data_root(tests_root_dir):
14			return os.path.join(tests_root_dir, 'dts')
15
16			# Test data
17			@pytest.fixture
18			def sample_json(tests_data_root):
19			return os.path.join(tests_data_root, 'sample-data.jsonlines')
20
21			@pytest.fixture
22			def sample_collaped_json(tests_data_root):
23			return os.path.join(tests_data_root, 'sample-data-collapsed.jsonlines')
24
25
26			@pytest.fixture()
27			def test_json_data(sample_json):
28			return {
29			'file_path': sample_json,
30			'nb_lines': 100,
31			'attributes': {'flavors', 'name', 'medical', 'description', 'image_urls', 'parents', 'negatives', 'grow_info', '_id', 'type', 'image_paths', 'effects'},
32			}
33
34
35			@pytest.fixture
36			def somagic():
37			from so_magic import init_so_magic
38			_ = init_so_magic()
39			return _
40
41
42			@pytest.fixture
43			def data_manager():
44			def getter():
45			from so_magic.data import init_data_manager
46			from so_magic.data.backend import init_engine
47			data_manager = init_data_manager(init_engine(engine_type='pd'))
48			return data_manager
49			return getter
50
51
52			@pytest.fixture
53			def read_observations():
54			"""Read a json lines formatted file and create the observations object (see Datapoints class)."""
55			def load_data(so_master, json_lines_formatted_file_path):
56			"""Create the observations object for a Datapoints instance, given a data file.
57
58			Args:
59			so_master (so_magic.so_master.SoMaster): an instance of SoMaster
60			json_lines_formatted_file_path (str): path to a json lines formatted file with the observations data
61			"""
62			cmd = so_master.command.observations_command
63			cmd.args = [json_lines_formatted_file_path]
64			cmd.execute()
65			return load_data
66
67
68			@pytest.fixture
69			def test_datapoints(read_observations, sample_collaped_json, somagic):
70			"""Read the designated json lines 'test file' (which contains the 'test observations') as a Datapoints instance."""
71			read_observations(somagic, sample_collaped_json)
72			return somagic.datapoints
73
74
75			@pytest.fixture
76			def test_dataset(somagic, read_observations, sample_collaped_json):
77			"""Dataset ready to be fed into a training/inference algorithm; feature vectors have been computed."""
78			read_observations(somagic, sample_collaped_json)
79
80			type_values = ['hybrid', 'indica', 'sativa']
81			ATTRS2 = [f'type_{x}' for x in type_values]
82			from functools import reduce
83			UNIQUE_FLAVORS = reduce(lambda i, j: set(i).union(set(j)),
84			[_ for _ in somagic._data_manager.datapoints.observations['flavors'] if _ is not None])
85
86			cmd = somagic._data_manager.command.select_variables_command
87			# current limitations:
88			# 1. client code has to know the number of distict values for the nominal variable 'type'
89			# 2. client code has to provide the column names that will result after encoding the 'type' variable
90			cmd.args = [[
91			# current limitations:
92			# 1. client code has to know the number of distict values for the nominal variable 'type'
93			# 2. client code has to provide the column names that will result after encoding the 'type' variable
94			{'variable': 'type', 'columns': ATTRS2},
95			# current limitations:
96			# 1. client code has to know the number of distict values for the nominal variable 'flavors'
97			# 2. client code has to provide the column names that will result after encoding the 'flavors' variable
98			{'variable': 'flavors', 'columns': list(UNIQUE_FLAVORS)}]]
99			cmd.execute()
100
101			cmd = somagic._data_manager.command.one_hot_encoding_command
102			cmd.args = [somagic._data_manager.datapoints, 'type']
103			cmd.execute()
104
105			assert set([type(x) for x in somagic._data_manager.datapoints.observations['flavors']]) == {list, type(None)}
106
107			nb_columns_before = len(somagic._data_manager.datapoints.observations.columns)
108
109			cmd = somagic._data_manager.command.one_hot_encoding_list_command
110			cmd.args = [somagic._data_manager.datapoints, 'flavors']
111			cmd.execute()
112
113			assert nb_columns_before + len(UNIQUE_FLAVORS) == len(somagic._data_manager.datapoints.observations.columns)
114
115			import numpy as np
116			setattr(somagic.dataset, 'feature_vectors',
117			np.array(somagic._data_manager.datapoints.observations[ATTRS2 + list(UNIQUE_FLAVORS)]))
118
119			MAX_FLAVORS_PER_DAATPOINT = max(
120			[len(x) for x in [_ for _ in somagic._data_manager.datapoints.observations['flavors'] if type(_) is list]])
121			return somagic.dataset, type_values, UNIQUE_FLAVORS, MAX_FLAVORS_PER_DAATPOINT, nb_columns_before
122
123
124			@pytest.fixture
125			def built_in_backends():
126			from so_magic.data.backend.panda_handling.df_backend import magic_backends
127			engine_backends = magic_backends()
128			return engine_backends
129
130
131			@pytest.fixture
132			def tabular_operators(built_in_backends):
133			operators = {
134			'retriever': {
135			'class': built_in_backends.backend_interfaces['retriever']['class_registry'].subclasses['pd'],
136			'interface': {
137			'column': '(identifier, data)',
138			'row': '(identifier, data)',
139			'nb_columns': '(data)',
140			'nb_rows': '(data)',
141			'get_numerical_attributes': '(data)',
142			}
143			},
144			'iterator': {
145			'class': built_in_backends.backend_interfaces['iterator']['class_registry'].subclasses['pd'],
146			'interface': {
147			'columnnames': '(data)',
148			'itercolumns': '(data)',
149			'iterrows': '(data)',
150			},
151			},
152			'mutator': {
153			'class': built_in_backends.backend_interfaces['mutator']['class_registry'].subclasses['pd'],
154			'interface': {
155			'add_column': '(datapoints, values, new_attribute, **kwargs)',
156			},
157			},
158			}
159			return {
160			'operators': operators,
161			'reverse_dict': {operator_dict['class']: key for key, operator_dict in operators.items()},
162			'get_nb_args': lambda operator_interface_name, method_name: len(operators[operator_interface_name]['interface'][method_name].replace(', **kwargs', '').split(',')),
163			# operator_name_2_required_methods
164			'required_methods': iter(((operator_interface_name, v['interface'].keys())
			0 ignored issues – show Comprehensibility Best Practice introduced 2021-06-17 02:56 UTC by Report Bug Copy Issue Report The variable `operator_interface_name` does not seem to be defined. Loading history... Comprehensibility Best Practice introduced 2021-06-17 02:56 UTC by Report Bug Copy Issue Report The variable `v` does not seem to be defined. Loading history...
165			for operator_interface_name, v in operators.items()))
166			}
167
168
169			@pytest.fixture
170			def assert_different_objects():
171			def _assert_different_objects(objects):
172			assert len(set([id(x) for x in objects])) == len(objects)
173			return _assert_different_objects
174

boromir674 / so-magic

Push — master ( 96da92...a1b572 )

conftest.test_dataset() A

Complexity

Size

Duplication

Importance

Duplication Side-by-Side

Filter issues like