Passed
Push — mpeta ( 1841cb...62640f )
by Konstantinos
03:46
created

test_encoding.eliminate_nan_n_None_command()   B

Complexity

Conditions 5

Size

Total Lines 24
Code Lines 19

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
cc 5
eloc 19
nop 2
dl 0
loc 24
rs 8.9833
c 0
b 0
f 0
1
import pytest
2
import math
3
from collections import Counter
4
import numpy as np
5
from pandas import isnull
6
7
@pytest.fixture
8
def eliminate_nan_n_None_command(somagic, test_datapoints):
9
10
    def _eliminate_nan_n_None_command(_data_manager, datapoints, attribute):
11
        c = Counter([type(x) for x in datapoints.column(attribute)])
12
        majority_type, max_value = c.most_common()[0]
13
        assert majority_type == list
14
15
        def check(x, target_type):
16
            try:
17
                if isnull(x):  # True for both np.nan and None
18
                    return target_type()
19
            except ValueError:
20
                pass
21
            return x
22
        datapoints.observations[attribute] = datapoints.observations[attribute].map(lambda a: check(a, majority_type))
23
        assert all([type(x) == majority_type for x in datapoints.column(attribute)])
24
25
    somagic.commands_decorators.data_manager_command()(_eliminate_nan_n_None_command)
26
    return getattr(somagic.command, _eliminate_nan_n_None_command.__name__)
27
28
29
def test_encoding_list_nominal(somagic, test_datapoints,
30
                               eliminate_nan_n_None_command,
31
                               ):
32
    dt_manager = somagic._data_manager
33
    assert len(dt_manager.engine.datapoints_manager.datapoints_registry.objects) == 1
34
35
    from collections import Counter
36
    c = Counter([type(x) for x in dt_manager.datapoints.column('flavors')])
37
    assert c == Counter({list: 98, type(None): 2})
38
39
    cmd = eliminate_nan_n_None_command
40
    cmd.args = [somagic.datapoints, 'flavors']
41
    cmd.execute()
42
43
    c = Counter([type(x) for x in dt_manager.datapoints.column('flavors')])
44
    assert c == Counter({list: 100})
45
    assert all([type(x) == list for x in dt_manager.datapoints.column('flavors')])
46
47
    cmd = dt_manager.command.encode_nominal_subsets_command
48
    cmd.args = [dt_manager.datapoints, 'flavors', 'encoded_flavors']
49
    cmd.execute()
50
51
    # assert set(dt_manager.datapoints.attributes) == set(_ for _ in list(test_json_data['attributes']) + ['encoded_flavors'])
52