1
|
|
|
import pytest |
2
|
|
|
|
3
|
|
|
|
4
|
|
|
@pytest.fixture |
5
|
|
|
def assert_selected_variables_are(somagic): |
6
|
|
|
def _assert_selected_variables_are(variables: set): |
7
|
|
|
assert set([x['variable'] for x in somagic._data_manager.feature_manager.feature_configuration.variables]) == variables |
8
|
|
|
return _assert_selected_variables_are |
9
|
|
|
|
10
|
|
|
|
11
|
|
|
@pytest.fixture |
12
|
|
|
def assert_column_values(test_dataset): |
13
|
|
|
import pandas as pd |
14
|
|
|
|
15
|
|
|
def _assert_column_values_are(attribute, expected_values): |
16
|
|
|
assert set([_ for _ in test_dataset[0].datapoints.observations[attribute]]) == set(expected_values) |
17
|
|
|
assert set(pd.unique(test_dataset[0].datapoints.observations[attribute])) == set(expected_values) |
18
|
|
|
return _assert_column_values_are |
19
|
|
|
|
20
|
|
|
|
21
|
|
|
@pytest.fixture |
22
|
|
|
def assert_correct_nominal_variable_encoding(test_dataset): |
23
|
|
|
"""Test a column with each row having a string representing one of the possible values of an Attrbiute. |
24
|
|
|
|
25
|
|
|
Useful when an Attribute corresponds to a discreet Variable of type Nominal (ordering does not matter) and its |
26
|
|
|
observation (row) can have only one of the possible values. |
27
|
|
|
""" |
28
|
|
|
from collections import Counter |
29
|
|
|
|
30
|
|
|
def _assert_nominal_variable_encoded_as_expected(expected_feature_columns): |
31
|
|
|
assert all(Counter([datarow[_] for _ in expected_feature_columns]) == |
32
|
|
|
Counter({0: len(expected_feature_columns) - 1, 1: 1}) |
33
|
|
|
for index, datarow in test_dataset[0].datapoints.observations[expected_feature_columns].iterrows()) |
34
|
|
|
return _assert_nominal_variable_encoded_as_expected |
35
|
|
|
|
36
|
|
|
|
37
|
|
|
def test_sanity_checks_on_dataset(test_dataset, assert_selected_variables_are, assert_column_values, |
38
|
|
|
assert_correct_nominal_variable_encoding): |
39
|
|
|
ATTRS2 = [f'type_{x}' for x in test_dataset[1]] |
40
|
|
|
|
41
|
|
|
datapoints = test_dataset[0].datapoints |
42
|
|
|
assert_selected_variables_are({'type', 'flavors'}) |
43
|
|
|
|
44
|
|
|
assert all(type(x) == str for x in datapoints.observations['type']) |
45
|
|
|
|
46
|
|
|
assert_column_values('type', expected_values=test_dataset[1]) |
47
|
|
|
|
48
|
|
|
assert_correct_nominal_variable_encoding(ATTRS2) |
49
|
|
|
|
50
|
|
|
# the below is expected because test_dataset invokes the 'one_hot_encoding_list_command' command which unfortunately |
51
|
|
|
# at the moment has a side effect on the attribute it operates on. |
52
|
|
|
# side effect: _data_manager.datapoints.observations[_attribute].fillna(value=np.nan, inplace=True) |
53
|
|
|
assert set([type(x) for x in datapoints.observations['flavors']]) == {list, float} |
54
|
|
|
|
55
|
|
|
assert len(test_dataset[2]) > 5 |
56
|
|
|
|
57
|
|
|
assert all(x in datapoints.observations.columns for x in test_dataset[2]) |
58
|
|
|
assert all(0 <= sum([datarow[_] for _ in test_dataset[2]]) <= test_dataset[3] |
59
|
|
|
for index, datarow in datapoints.observations[list(test_dataset[2])].iterrows()) |
60
|
|
|
|
61
|
|
|
assert hasattr(test_dataset[0], 'feature_vectors') |
62
|
|
|
|