test_dataset - Code Metrics - Inspection of "Merge pull request #33 from boromir674/release" - boromir674/so-magic - Measure and Improve Code Quality continuously with Scrutinizer

Passed

Push — master ( 96da92...a1b572 )

by Konstantinos

created 2021-06-24 15:13 UTC

test_dataset A

↳ Parent: Project

Complexity

Total Complexity

Size/Duplication

Total Lines	62
Duplicated Lines	0 %

Importance

Changes

Metric	Value
eloc	37
dl	0
loc	62
rs	10
c	0
b	0
f	0
wmc	4

4 Functions

Rating	Name	Size	Complexity
A	test_sanity_checks_on_dataset()	25	1
A	assert_column_values()	8	1
A	assert_selected_variables_are()	5	1
A	assert_correct_nominal_variable_encoding()	14	1

import pytest


@pytest.fixture
def assert_selected_variables_are(somagic):
    def _assert_selected_variables_are(variables: set):
        assert set([x['variable'] for x in somagic._data_manager.feature_manager.feature_configuration.variables]) == variables
    return _assert_selected_variables_are


@pytest.fixture
def assert_column_values(test_dataset):
    import pandas as pd

    def _assert_column_values_are(attribute, expected_values):
        assert set([_ for _ in test_dataset[0].datapoints.observations[attribute]]) == set(expected_values)
        assert set(pd.unique(test_dataset[0].datapoints.observations[attribute])) == set(expected_values)
    return _assert_column_values_are


@pytest.fixture
def assert_correct_nominal_variable_encoding(test_dataset):
    """Test a column with each row having a string representing one of the possible values of an Attrbiute.

    Useful when an Attribute corresponds to a discreet Variable of type Nominal (ordering does not matter) and its
    observation (row) can have only one of the possible values.
    """
    from collections import Counter

    def _assert_nominal_variable_encoded_as_expected(expected_feature_columns):
        assert all(Counter([datarow[_] for _ in expected_feature_columns]) ==
                   Counter({0: len(expected_feature_columns) - 1, 1: 1})
                   for index, datarow in test_dataset[0].datapoints.observations[expected_feature_columns].iterrows())
    return _assert_nominal_variable_encoded_as_expected


def test_sanity_checks_on_dataset(test_dataset, assert_selected_variables_are, assert_column_values,
                                  assert_correct_nominal_variable_encoding):
    ATTRS2 = [f'type_{x}' for x in test_dataset[1]]

    datapoints = test_dataset[0].datapoints
    assert_selected_variables_are({'type', 'flavors'})

    assert all(type(x) == str for x in datapoints.observations['type'])

    assert_column_values('type', expected_values=test_dataset[1])

    assert_correct_nominal_variable_encoding(ATTRS2)

    # the below is expected because test_dataset invokes the 'one_hot_encoding_list_command' command which unfortunately
    # at the moment has a side effect on the attribute it operates on.
    # side effect: _data_manager.datapoints.observations[_attribute].fillna(value=np.nan, inplace=True)
    assert set([type(x) for x in datapoints.observations['flavors']]) == {list, float}

    assert len(test_dataset[2]) > 5

    assert all(x in datapoints.observations.columns for x in test_dataset[2])
    assert all(0 <= sum([datarow[_] for _ in test_dataset[2]]) <= test_dataset[3]
               for index, datarow in datapoints.observations[list(test_dataset[2])].iterrows())

    assert hasattr(test_dataset[0], 'feature_vectors')


1			import pytest
2
3
4			@pytest.fixture
5			def assert_selected_variables_are(somagic):
6			def _assert_selected_variables_are(variables: set):
7			assert set([x['variable'] for x in somagic._data_manager.feature_manager.feature_configuration.variables]) == variables
8			return _assert_selected_variables_are
9
10
11			@pytest.fixture
12			def assert_column_values(test_dataset):
13			import pandas as pd
14
15			def _assert_column_values_are(attribute, expected_values):
16			assert set([_ for _ in test_dataset[0].datapoints.observations[attribute]]) == set(expected_values)
17			assert set(pd.unique(test_dataset[0].datapoints.observations[attribute])) == set(expected_values)
18			return _assert_column_values_are
19
20
21			@pytest.fixture
22			def assert_correct_nominal_variable_encoding(test_dataset):
23			"""Test a column with each row having a string representing one of the possible values of an Attrbiute.
24
25			Useful when an Attribute corresponds to a discreet Variable of type Nominal (ordering does not matter) and its
26			observation (row) can have only one of the possible values.
27			"""
28			from collections import Counter
29
30			def _assert_nominal_variable_encoded_as_expected(expected_feature_columns):
31			assert all(Counter([datarow[_] for _ in expected_feature_columns]) ==
32			Counter({0: len(expected_feature_columns) - 1, 1: 1})
33			for index, datarow in test_dataset[0].datapoints.observations[expected_feature_columns].iterrows())
34			return _assert_nominal_variable_encoded_as_expected
35
36
37			def test_sanity_checks_on_dataset(test_dataset, assert_selected_variables_are, assert_column_values,
38			assert_correct_nominal_variable_encoding):
39			ATTRS2 = [f'type_{x}' for x in test_dataset[1]]
40
41			datapoints = test_dataset[0].datapoints
42			assert_selected_variables_are({'type', 'flavors'})
43
44			assert all(type(x) == str for x in datapoints.observations['type'])
45
46			assert_column_values('type', expected_values=test_dataset[1])
47
48			assert_correct_nominal_variable_encoding(ATTRS2)
49
50			# the below is expected because test_dataset invokes the 'one_hot_encoding_list_command' command which unfortunately
51			# at the moment has a side effect on the attribute it operates on.
52			# side effect: _data_manager.datapoints.observations[_attribute].fillna(value=np.nan, inplace=True)
53			assert set([type(x) for x in datapoints.observations['flavors']]) == {list, float}
54
55			assert len(test_dataset[2]) > 5
56
57			assert all(x in datapoints.observations.columns for x in test_dataset[2])
58			assert all(0 <= sum([datarow[_] for _ in test_dataset[2]]) <= test_dataset[3]
59			for index, datarow in datapoints.observations[list(test_dataset[2])].iterrows())
60
61			assert hasattr(test_dataset[0], 'feature_vectors')
62

boromir674 / so-magic

Push — master ( 96da92...a1b572 )

test_dataset A

Complexity

Size/Duplication

Importance

4 Functions

Duplication Side-by-Side

Filter issues like