Passed
Push — master ( 96da92...a1b572 )
by Konstantinos
37s queued 14s
created

test_dataset   A

Complexity

Total Complexity 4

Size/Duplication

Total Lines 62
Duplicated Lines 0 %

Importance

Changes 0
Metric Value
eloc 37
dl 0
loc 62
rs 10
c 0
b 0
f 0
wmc 4

4 Functions

Rating   Name   Duplication   Size   Complexity  
A test_sanity_checks_on_dataset() 0 25 1
A assert_column_values() 0 8 1
A assert_selected_variables_are() 0 5 1
A assert_correct_nominal_variable_encoding() 0 14 1
1
import pytest
2
3
4
@pytest.fixture
5
def assert_selected_variables_are(somagic):
6
    def _assert_selected_variables_are(variables: set):
7
        assert set([x['variable'] for x in somagic._data_manager.feature_manager.feature_configuration.variables]) == variables
8
    return _assert_selected_variables_are
9
10
11
@pytest.fixture
12
def assert_column_values(test_dataset):
13
    import pandas as pd
14
15
    def _assert_column_values_are(attribute, expected_values):
16
        assert set([_ for _ in test_dataset[0].datapoints.observations[attribute]]) == set(expected_values)
17
        assert set(pd.unique(test_dataset[0].datapoints.observations[attribute])) == set(expected_values)
18
    return _assert_column_values_are
19
20
21
@pytest.fixture
22
def assert_correct_nominal_variable_encoding(test_dataset):
23
    """Test a column with each row having a string representing one of the possible values of an Attrbiute.
24
25
    Useful when an Attribute corresponds to a discreet Variable of type Nominal (ordering does not matter) and its
26
    observation (row) can have only one of the possible values.
27
    """
28
    from collections import Counter
29
30
    def _assert_nominal_variable_encoded_as_expected(expected_feature_columns):
31
        assert all(Counter([datarow[_] for _ in expected_feature_columns]) ==
32
                   Counter({0: len(expected_feature_columns) - 1, 1: 1})
33
                   for index, datarow in test_dataset[0].datapoints.observations[expected_feature_columns].iterrows())
34
    return _assert_nominal_variable_encoded_as_expected
35
36
37
def test_sanity_checks_on_dataset(test_dataset, assert_selected_variables_are, assert_column_values,
38
                                  assert_correct_nominal_variable_encoding):
39
    ATTRS2 = [f'type_{x}' for x in test_dataset[1]]
40
41
    datapoints = test_dataset[0].datapoints
42
    assert_selected_variables_are({'type', 'flavors'})
43
44
    assert all(type(x) == str for x in datapoints.observations['type'])
45
46
    assert_column_values('type', expected_values=test_dataset[1])
47
48
    assert_correct_nominal_variable_encoding(ATTRS2)
49
50
    # the below is expected because test_dataset invokes the 'one_hot_encoding_list_command' command which unfortunately
51
    # at the moment has a side effect on the attribute it operates on.
52
    # side effect: _data_manager.datapoints.observations[_attribute].fillna(value=np.nan, inplace=True)
53
    assert set([type(x) for x in datapoints.observations['flavors']]) == {list, float}
54
55
    assert len(test_dataset[2]) > 5
56
57
    assert all(x in datapoints.observations.columns for x in test_dataset[2])
58
    assert all(0 <= sum([datarow[_] for _ in test_dataset[2]]) <= test_dataset[3]
59
               for index, datarow in datapoints.observations[list(test_dataset[2])].iterrows())
60
61
    assert hasattr(test_dataset[0], 'feature_vectors')
62