test_dataset - Code Metrics - Inspection of "wip" - boromir674/so-magic - Measure and Improve Code Quality continuously with Scrutinizer

Passed

Push — mpeta ( 1841cb...62640f )

by Konstantinos

created 2021-07-01 16:30 UTC

test_dataset A

↳ Parent: tests.test_dataset

Complexity

Total Complexity

Size/Duplication

Total Lines	62
Duplicated Lines	0 %

Importance

Changes

Metric	Value
eloc	37
dl	0
loc	62
rs	10
c	0
b	0
f	0
wmc	4

import pytest


@pytest.fixture
def assert_selected_variables_are(somagic):
    def _assert_selected_variables_are(variables: set):
        assert set([x['variable'] for x in somagic._data_manager.feature_manager.feature_configuration.variables]) == variables
    return _assert_selected_variables_are


@pytest.fixture
def assert_column_values(test_dataset):
    """Assert that the set of some tabular data column's values is equal to the given set."""
    import pandas as pd

    def _assert_column_values_are(attribute, expected_values):
        assert set([_ for _ in test_dataset[0].datapoints.observations[attribute]]) == set(expected_values)
        assert set(pd.unique(test_dataset[0].datapoints.observations[attribute])) == set(expected_values)
    return _assert_column_values_are


@pytest.fixture
def assert_correct_nominal_variable_encoding(test_dataset):
    """Test a column with each row having a string representing one of the possible values of an Attribute.

    Useful when an Attribute corresponds to a discreet Variable of type Nominal (ordering does not matter) and its
    observation (row) can have only one of the possible values.
    """
    from collections import Counter

    def _assert_nominal_variable_encoded_as_expected(expected_feature_columns):
        assert all(Counter([datarow[_] for _ in expected_feature_columns]) ==
                   Counter({0: len(expected_feature_columns) - 1, 1: 1})
                   for index, datarow in test_dataset[0].datapoints.observations[expected_feature_columns].iterrows())
    return _assert_nominal_variable_encoded_as_expected


def test_sanity_checks_on_dataset(test_dataset, assert_selected_variables_are, assert_column_values,
                                  assert_correct_nominal_variable_encoding):
    ATTRS2 = [f'type_{x}' for x in test_dataset[1]]

    datapoints = test_dataset[0].datapoints
    assert_selected_variables_are({'type', 'flavors'})

    assert all(type(x) == str for x in datapoints.observations['type'])

    assert_column_values('type', expected_values=test_dataset[1])

    assert_correct_nominal_variable_encoding(ATTRS2)

    # the below is expected because test_dataset invokes the 'one_hot_encoding_list_command' command which unfortunately
    # at the moment has a side effect on the attribute it operates on.
    # side effect: _data_manager.datapoints.observations[_attribute].fillna(value=np.nan, inplace=True)
    assert set([type(x) for x in datapoints.observations['flavors']]) == {list, float}

    assert len(test_dataset[2]) > 5

    assert all(x in datapoints.observations.columns for x in test_dataset[2])
    assert all(0 <= sum([datarow[_] for _ in test_dataset[2]]) <= test_dataset[3]
               for index, datarow in datapoints.observations[list(test_dataset[2])].iterrows())

    assert hasattr(test_dataset[0], 'feature_vectors')


1			import pytest
2
3
4			@pytest.fixture
5			def assert_selected_variables_are(somagic):
6			def _assert_selected_variables_are(variables: set):
7			assert set([x['variable'] for x in somagic._data_manager.feature_manager.feature_configuration.variables]) == variables
8			return _assert_selected_variables_are
9
10
11			@pytest.fixture
12			def assert_column_values(test_dataset):
13			"""Assert that the set of some tabular data column's values is equal to the given set."""
14			import pandas as pd
15
16			def _assert_column_values_are(attribute, expected_values):
17			assert set([_ for _ in test_dataset[0].datapoints.observations[attribute]]) == set(expected_values)
18			assert set(pd.unique(test_dataset[0].datapoints.observations[attribute])) == set(expected_values)
19			return _assert_column_values_are
20
21
22			@pytest.fixture
23			def assert_correct_nominal_variable_encoding(test_dataset):
24			"""Test a column with each row having a string representing one of the possible values of an Attribute.
25
26			Useful when an Attribute corresponds to a discreet Variable of type Nominal (ordering does not matter) and its
27			observation (row) can have only one of the possible values.
28			"""
29			from collections import Counter
30
31			def _assert_nominal_variable_encoded_as_expected(expected_feature_columns):
32			assert all(Counter([datarow[_] for _ in expected_feature_columns]) ==
33			Counter({0: len(expected_feature_columns) - 1, 1: 1})
34			for index, datarow in test_dataset[0].datapoints.observations[expected_feature_columns].iterrows())
35			return _assert_nominal_variable_encoded_as_expected
36
37
38			def test_sanity_checks_on_dataset(test_dataset, assert_selected_variables_are, assert_column_values,
39			assert_correct_nominal_variable_encoding):
40			ATTRS2 = [f'type_{x}' for x in test_dataset[1]]
41
42			datapoints = test_dataset[0].datapoints
43			assert_selected_variables_are({'type', 'flavors'})
44
45			assert all(type(x) == str for x in datapoints.observations['type'])
46
47			assert_column_values('type', expected_values=test_dataset[1])
48
49			assert_correct_nominal_variable_encoding(ATTRS2)
50
51			# the below is expected because test_dataset invokes the 'one_hot_encoding_list_command' command which unfortunately
52			# at the moment has a side effect on the attribute it operates on.
53			# side effect: _data_manager.datapoints.observations[_attribute].fillna(value=np.nan, inplace=True)
54			assert set([type(x) for x in datapoints.observations['flavors']]) == {list, float}
55
56			assert len(test_dataset[2]) > 5
57
58			assert all(x in datapoints.observations.columns for x in test_dataset[2])
59			assert all(0 <= sum([datarow[_] for _ in test_dataset[2]]) <= test_dataset[3]
60			for index, datarow in datapoints.observations[list(test_dataset[2])].iterrows())
61
62			assert hasattr(test_dataset[0], 'feature_vectors')
63

boromir674 / so-magic

Push — mpeta ( 1841cb...62640f )

test_dataset A

Complexity

Size/Duplication

Importance

Duplication Side-by-Side

Filter issues like