Passed
Push — mpeta ( 4519ef...1841cb )
by Konstantinos
01:23
created

test_encoding.eliminate_nan_n_None_command()   B

Complexity

Conditions 5

Size

Total Lines 24
Code Lines 19

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
cc 5
eloc 19
nop 2
dl 0
loc 24
rs 8.9833
c 0
b 0
f 0
1
import pytest
2
import math
3
from collections import Counter
4
import numpy as np
5
from pandas import isnull
6
7
@pytest.fixture
8
def eliminate_nan_n_None_command(somagic, test_datapoints):
9
10
    def _eliminate_nan_n_None_command(_data_manager, datapoints, attribute):
11
        c = Counter([type(x) for x in datapoints.column(attribute)])
12
        majority_type, max_value = c.most_common()[0]
13
        assert majority_type == list
14
15
        def check(x, target_type):
16
            if type(x) == target_type:
17
                return x
18
            try:
19
                if isnull(x):
20
                    return majority_type()
21
            except ValueError as e:
22
                pass
23
            return x
24
25
        datapoints.observations[attribute] = datapoints.observations[attribute].map(lambda a: check(a, majority_type))
26
        # datapoints.add_column(frame, attribute)
27
        assert all([type(x) == majority_type for x in datapoints.column(attribute)])
28
29
    somagic.commands_decorators.data_manager_command()(_eliminate_nan_n_None_command)
30
    return getattr(somagic.command, _eliminate_nan_n_None_command.__name__)
31
32
33
def test_encoding_list_nominal(somagic, test_datapoints,
34
                               eliminate_nan_n_None_command,
35
                               test_json_data):
36
    dt_manager = somagic._data_manager
37
    assert len(dt_manager.engine.datapoints_manager.datapoints_registry.objects) == 1
38
39
    from collections import Counter
40
    c = Counter([type(x) for x in dt_manager.datapoints.column('flavors')])
41
    assert c == Counter({list: 98, type(None): 2})
42
43
    cmd = eliminate_nan_n_None_command
44
    cmd.args = [somagic.datapoints, 'flavors']
45
    cmd.execute()
46
47
    c = Counter([type(x) for x in dt_manager.datapoints.column('flavors')])
48
    assert c == Counter({list: 100})
49
    assert all([type(x) == list for x in dt_manager.datapoints.column('flavors')])
50
51
    cmd = dt_manager.command.encode_nominal_subsets_command
52
    cmd.args = [dt_manager.datapoints, 'flavors', 'encoded_flavors']
53
    cmd.execute()
54
55
    # assert set(dt_manager.datapoints.attributes) == set(_ for _ in list(test_json_data['attributes']) + ['encoded_flavors'])
56