tests.conftest.test_dataset() - Code Metrics - Inspection of "feat(commands): create the encode and replace_empt..." - boromir674/so-magic - Measure and Improve Code Quality continuously with Scrutinizer

Passed

Push — mpeta ( 62640f...eed483 )

by Konstantinos

created 2021-07-06 14:07 UTC

tests.conftest.test_dataset() B

↳ Parent: tests.conftest

Complexity

Conditions

Size

Total Lines	72
Code Lines	44

Duplication

Lines	0
Ratio	0 %

Importance

Changes

Metric	Value
cc	4
eloc	44
nop	3
dl	0
loc	72
rs	8.824
c	0
b	0
f	0

How to fix Long Method

from glob import glob
import pytest


def file_path_to_module_path(string: str) -> str:
    return string.replace("/", ".").replace("\\", ".").replace(".py", "")


pytest_plugins = [
    file_path_to_module_path(fixture) for fixture in glob("tests/fixtures/*.py") if "__" not in fixture
]


@pytest.fixture
def somagic():
    from so_magic import init_so_magic
    _ = init_so_magic()
    return _


@pytest.fixture
def data_manager():
    def getter():
        from so_magic.data import init_data_manager
        from so_magic.data.backend import init_engine
        data_manager = init_data_manager(init_engine(engine_type='pd'))
        return data_manager
    return getter


@pytest.fixture
def read_observations():
    """Read a json lines formatted file and create the observations object (see Datapoints class)."""
    def load_data(so_master, json_lines_formatted_file_path):
        """Create the observations object for a Datapoints instance, given a data file.

        Args:
            so_master (so_magic.so_master.SoMaster): an instance of SoMaster
            json_lines_formatted_file_path (str): path to a json lines formatted file with the observations data
        """
        cmd = so_master.command.observations_command
        cmd.args = [json_lines_formatted_file_path]
        cmd.execute()
    return load_data


@pytest.fixture
def test_datapoints(read_observations, sample_collaped_json, somagic):
    """Read the designated json lines 'test file' (which contains the 'test observations') as a Datapoints instance."""
    read_observations(somagic, sample_collaped_json)
    return somagic.datapoints


@pytest.fixture
def test_dataset(somagic, read_observations, sample_collaped_json):
    """Dataset ready to be fed into a training/inference algorithm; feature vectors have been computed."""
    read_observations(somagic, sample_collaped_json)

    type_values = ['hybrid', 'indica', 'sativa']
    expected_feature_names = [f'type_{x}' for x in type_values]
    from functools import reduce
    UNIQUE_FLAVORS = reduce(lambda i, j: set(i).union(set(j)),
                            [_ for _ in somagic._data_manager.datapoints.observations['flavors'] if _ is not None])
    
    variables = type('Variables', (object,), {
        'type': type('Variable', (object,), {
            'type': 'nominal',
            'data_type': str,
            '__str__': lambda self: 'type',
            }
        )(),
        'flavors': type('Variable', (object,), {
            'type': 'nominal',
            'data_type': list,
            '__str__': lambda self: 'flavors',
            }
        )(),
    })

    assert len(somagic._data_manager.datapoints) == 100
    assert all(x not in somagic._data_manager.datapoints.attributes for x in expected_feature_names)

    cmd = somagic._data_manager.command.encode_command
    cmd.args = [variables.type]
    cmd.execute()

    runtime_feature_names = list(somagic._data_manager.datapoints.attributes)[-len(expected_feature_names):]
    assert runtime_feature_names == expected_feature_names

    cmd = somagic._data_manager.command.replace_empty_command
    cmd.args = [variables.flavors]
    cmd.execute()

    assert set([type(x) for x in somagic._data_manager.datapoints.observations['flavors']]) == {list}

    nb_columns_before = len(somagic._data_manager.datapoints.observations.columns)

    cmd = somagic._data_manager.command.encode_command
    cmd.args = [variables.flavors]
    cmd.execute()

    assert nb_columns_before + len(UNIQUE_FLAVORS) == len(somagic._data_manager.datapoints.observations.columns)

    cmd = somagic._data_manager.command.select_variables_command
    # current limitations:
    # 1. client code has to know the number of distict values for the nominal variable 'type'
    # 2. client code has to provide the column names that will result after encoding the 'type' variable
    cmd.args = [[
        # current limitations:
        # 1. client code has to know the number of distict values for the nominal variable 'type'
        # 2. client code has to provide the column names that will result after encoding the 'type' variable
        {'variable': 'type', 'columns': runtime_feature_names},
        # current limitations:
        # 1. client code has to know the number of distict values for the nominal variable 'flavors'
        # 2. client code has to provide the column names that will result after encoding the 'flavors' variable
        {'variable': 'flavors', 'columns': list(UNIQUE_FLAVORS)}]]
    cmd.execute()

    import numpy as np
    setattr(somagic.dataset, 'feature_vectors',
            np.array(somagic._data_manager.datapoints.observations[runtime_feature_names + ['flavors_' + x for x in UNIQUE_FLAVORS]]))

    MAX_FLAVORS_PER_DAATPOINT = max(
        [len(x) for x in [_ for _ in somagic._data_manager.datapoints.observations['flavors'] if type(_) is list]])
    return somagic.dataset, type_values, UNIQUE_FLAVORS, MAX_FLAVORS_PER_DAATPOINT, nb_columns_before


@pytest.fixture
def built_in_backends():
    from so_magic.data.backend.panda_handling.df_backend import magic_backends
    engine_backends = magic_backends()
    return engine_backends


@pytest.fixture
def tabular_operators(built_in_backends):
    operators = {
        'retriever': {
            'class': built_in_backends.backend_interfaces['retriever']['class_registry'].subclasses['pd'],
            'interface': {
                'column': '(identifier, data)',
                'row': '(identifier, data)',
                'nb_columns': '(data)',
                'nb_rows': '(data)',
                'get_numerical_attributes': '(data)',
            }
        },
        'iterator': {
            'class': built_in_backends.backend_interfaces['iterator']['class_registry'].subclasses['pd'],
            'interface': {
                'columnnames': '(data)',
                'itercolumns': '(data)',
                'iterrows': '(data)',
            },
        },
        'mutator': {
            'class': built_in_backends.backend_interfaces['mutator']['class_registry'].subclasses['pd'],
            'interface': {
                'add_column': '(datapoints, values, new_attribute, **kwargs)',
                'add_columns': '(datapoints, values, column_names, **kwargs)',
            },
        },
    }
    return {
        'operators': operators,
        'reverse_dict': {operator_dict['class']: key for key, operator_dict in operators.items()},
        'get_nb_args': lambda operator_interface_name, method_name: len(operators[operator_interface_name]['interface'][method_name].replace(', **kwargs', '').split(',')),
        # operator_name_2_required_methods
        'required_methods': iter(((operator_interface_name, v['interface'].keys())

                                  for operator_interface_name, v in operators.items()))
    }


@pytest.fixture
def assert_different_objects():
    def _assert_different_objects(objects):
        assert len(set([id(x) for x in objects])) == len(objects)
    return _assert_different_objects


1			from glob import glob
2			import pytest
3
4
5			def file_path_to_module_path(string: str) -> str:
6			return string.replace("/", ".").replace("\\", ".").replace(".py", "")
7
8
9			pytest_plugins = [
10			file_path_to_module_path(fixture) for fixture in glob("tests/fixtures/*.py") if "__" not in fixture
11			]
12
13
14			@pytest.fixture
15			def somagic():
16			from so_magic import init_so_magic
17			_ = init_so_magic()
18			return _
19
20
21			@pytest.fixture
22			def data_manager():
23			def getter():
24			from so_magic.data import init_data_manager
25			from so_magic.data.backend import init_engine
26			data_manager = init_data_manager(init_engine(engine_type='pd'))
27			return data_manager
28			return getter
29
30
31			@pytest.fixture
32			def read_observations():
33			"""Read a json lines formatted file and create the observations object (see Datapoints class)."""
34			def load_data(so_master, json_lines_formatted_file_path):
35			"""Create the observations object for a Datapoints instance, given a data file.
36
37			Args:
38			so_master (so_magic.so_master.SoMaster): an instance of SoMaster
39			json_lines_formatted_file_path (str): path to a json lines formatted file with the observations data
40			"""
41			cmd = so_master.command.observations_command
42			cmd.args = [json_lines_formatted_file_path]
43			cmd.execute()
44			return load_data
45
46
47			@pytest.fixture
48			def test_datapoints(read_observations, sample_collaped_json, somagic):
49			"""Read the designated json lines 'test file' (which contains the 'test observations') as a Datapoints instance."""
50			read_observations(somagic, sample_collaped_json)
51			return somagic.datapoints
52
53
54			@pytest.fixture
55			def test_dataset(somagic, read_observations, sample_collaped_json):
56			"""Dataset ready to be fed into a training/inference algorithm; feature vectors have been computed."""
57			read_observations(somagic, sample_collaped_json)
58
59			type_values = ['hybrid', 'indica', 'sativa']
60			expected_feature_names = [f'type_{x}' for x in type_values]
61			from functools import reduce
62			UNIQUE_FLAVORS = reduce(lambda i, j: set(i).union(set(j)),
63			[_ for _ in somagic._data_manager.datapoints.observations['flavors'] if _ is not None])
64
65			variables = type('Variables', (object,), {
66			'type': type('Variable', (object,), {
67			'type': 'nominal',
68			'data_type': str,
69			'__str__': lambda self: 'type',
70			}
71			)(),
72			'flavors': type('Variable', (object,), {
73			'type': 'nominal',
74			'data_type': list,
75			'__str__': lambda self: 'flavors',
76			}
77			)(),
78			})
79
80			assert len(somagic._data_manager.datapoints) == 100
81			assert all(x not in somagic._data_manager.datapoints.attributes for x in expected_feature_names)
82
83			cmd = somagic._data_manager.command.encode_command
84			cmd.args = [variables.type]
85			cmd.execute()
86
87			runtime_feature_names = list(somagic._data_manager.datapoints.attributes)[-len(expected_feature_names):]
88			assert runtime_feature_names == expected_feature_names
89
90			cmd = somagic._data_manager.command.replace_empty_command
91			cmd.args = [variables.flavors]
92			cmd.execute()
93
94			assert set([type(x) for x in somagic._data_manager.datapoints.observations['flavors']]) == {list}
95
96			nb_columns_before = len(somagic._data_manager.datapoints.observations.columns)
97
98			cmd = somagic._data_manager.command.encode_command
99			cmd.args = [variables.flavors]
100			cmd.execute()
101
102			assert nb_columns_before + len(UNIQUE_FLAVORS) == len(somagic._data_manager.datapoints.observations.columns)
103
104			cmd = somagic._data_manager.command.select_variables_command
105			# current limitations:
106			# 1. client code has to know the number of distict values for the nominal variable 'type'
107			# 2. client code has to provide the column names that will result after encoding the 'type' variable
108			cmd.args = [[
109			# current limitations:
110			# 1. client code has to know the number of distict values for the nominal variable 'type'
111			# 2. client code has to provide the column names that will result after encoding the 'type' variable
112			{'variable': 'type', 'columns': runtime_feature_names},
113			# current limitations:
114			# 1. client code has to know the number of distict values for the nominal variable 'flavors'
115			# 2. client code has to provide the column names that will result after encoding the 'flavors' variable
116			{'variable': 'flavors', 'columns': list(UNIQUE_FLAVORS)}]]
117			cmd.execute()
118
119			import numpy as np
120			setattr(somagic.dataset, 'feature_vectors',
121			np.array(somagic._data_manager.datapoints.observations[runtime_feature_names + ['flavors_' + x for x in UNIQUE_FLAVORS]]))
122
123			MAX_FLAVORS_PER_DAATPOINT = max(
124			[len(x) for x in [_ for _ in somagic._data_manager.datapoints.observations['flavors'] if type(_) is list]])
125			return somagic.dataset, type_values, UNIQUE_FLAVORS, MAX_FLAVORS_PER_DAATPOINT, nb_columns_before
126
127
128			@pytest.fixture
129			def built_in_backends():
130			from so_magic.data.backend.panda_handling.df_backend import magic_backends
131			engine_backends = magic_backends()
132			return engine_backends
133
134
135			@pytest.fixture
136			def tabular_operators(built_in_backends):
137			operators = {
138			'retriever': {
139			'class': built_in_backends.backend_interfaces['retriever']['class_registry'].subclasses['pd'],
140			'interface': {
141			'column': '(identifier, data)',
142			'row': '(identifier, data)',
143			'nb_columns': '(data)',
144			'nb_rows': '(data)',
145			'get_numerical_attributes': '(data)',
146			}
147			},
148			'iterator': {
149			'class': built_in_backends.backend_interfaces['iterator']['class_registry'].subclasses['pd'],
150			'interface': {
151			'columnnames': '(data)',
152			'itercolumns': '(data)',
153			'iterrows': '(data)',
154			},
155			},
156			'mutator': {
157			'class': built_in_backends.backend_interfaces['mutator']['class_registry'].subclasses['pd'],
158			'interface': {
159			'add_column': '(datapoints, values, new_attribute, **kwargs)',
160			'add_columns': '(datapoints, values, column_names, **kwargs)',
161			},
162			},
163			}
164			return {
165			'operators': operators,
166			'reverse_dict': {operator_dict['class']: key for key, operator_dict in operators.items()},
167			'get_nb_args': lambda operator_interface_name, method_name: len(operators[operator_interface_name]['interface'][method_name].replace(', **kwargs', '').split(',')),
168			# operator_name_2_required_methods
169			'required_methods': iter(((operator_interface_name, v['interface'].keys())
			0 ignored issues – show Comprehensibility Best Practice introduced 2021-06-17 02:56 UTC by Report Bug Copy Issue Report The variable `v` does not seem to be defined. Loading history... Comprehensibility Best Practice introduced 2021-06-17 02:56 UTC by Report Bug Copy Issue Report The variable `operator_interface_name` does not seem to be defined. Loading history...
170			for operator_interface_name, v in operators.items()))
171			}
172
173
174			@pytest.fixture
175			def assert_different_objects():
176			def _assert_different_objects(objects):
177			assert len(set([id(x) for x in objects])) == len(objects)
178			return _assert_different_objects
179

boromir674 / so-magic

Push — mpeta ( 62640f...eed483 )

tests.conftest.test_dataset() B

Complexity

Size

Duplication

Importance

How to fix Long Method

Long Method

Duplication Side-by-Side

Filter issues like