Passed
Pull Request — dev (#32)
by Konstantinos
05:57 queued 03:46
created

test_discretization.test_discretizer()   A

Complexity

Conditions 1

Size

Total Lines 8
Code Lines 6

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
cc 1
eloc 6
nop 0
dl 0
loc 8
rs 10
c 0
b 0
f 0
1
import pytest
2
3
4
@pytest.fixture
5
def data_manager_command_decorators(somagic):
6
    return {
7
        'data_manager_cmd': somagic.commands_decorators.data_manager_command,
8
        'arbitrary_cmd': somagic.commands_decorators.arbitrary_command,
9
    }
10
11
12
@pytest.fixture
13
def define_command():
14
    def _define_engine_command(decorator, command_function):
15
        decorator(command_function)
16
    return _define_engine_command
17
18
19
@pytest.fixture
20
def get_command(somagic):
21
    def _get_command(command_name: str):
22
        return getattr(somagic.command, command_name)
23
    return _get_command
24
25
26
@pytest.fixture
27
def test_discretizer():
28
    from so_magic.data.discretization import Discretizer, BinningAlgorithm
29
30
    alg = BinningAlgorithm.from_built_in('pd.cut')
31
32
    discretizer = Discretizer.from_algorithm(alg)
33
    return discretizer
34
35
36
@pytest.fixture
37
def discretize_command():
38
    import pandas as pd
39
40
    def get_discretize_command(discretizer):
41
        def test_discretize_command(data_manager, datapoints, attribute, nb_bins, new_column_name):
42
            output = discretizer.discretize(datapoints, attribute, nb_bins)
43
            data_manager.datapoints.add_column(output['result'], new_column_name)
44
        return test_discretize_command
45
    return get_discretize_command
46
47
48
@pytest.fixture
49
def validate_discretization_operation_behaviour():
50
    def _validate_discretization_operation(cmd, algorithm):
51
        datapoints = cmd.args[0]
52
        target_column = cmd.args[1]
53
        nb_bins = cmd.args[2]
54
        min_value = min(iter(datapoints.column(target_column)))
55
        max_value = max(iter(datapoints.column(target_column)))
56
        bin_size = (max_value - min_value) / float(nb_bins)
57
        computed_bins = algorithm.output['settings']['used_bins']
58
        assert [_ for _ in computed_bins] == [-0.1, 25.0, 50.0, 75.0, 100.0]
59
60
        input_arguments = algorithm.output['settings']['arguments']
61
        to_check = [len(input_arguments[0]), input_arguments[1]]
62
        assert to_check == [len(datapoints), nb_bins]
63
        assert type(datapoints.column(target_column)) == type(input_arguments[0])
64
        assert list(datapoints.column(target_column)) == list(input_arguments[0])
65
        # assert algorithm.output['settings']['parameters'] == []
66
    return _validate_discretization_operation
67
68
69
@pytest.fixture
70
def discretiztion_test_data(somagic, load_test_data_this):
71
    load_test_data_this(somagic)
72
    print('DATAPOINTS BEFORE', len(somagic.datapoints.attributes))
73
    print(set(type(x) for x in somagic.dataset.datapoints.column('Creative')))
74
    series = somagic.dataset.datapoints.column('Creative').replace('', 0.0, inplace=False)
75
    assert all(type(x) == float for x in series)
76
    print(type(series))
77
    print('MIN', min(series))
78
    print('MAX', max(series))
79
80
    somagic.datapoints.add_column(list(series), 'Creative')
81
    print('DATAPOINTS AFTER', len(somagic.datapoints.attributes))
82
    print(set(type(x) for x in somagic.dataset.datapoints.column('Creative')))
83
    
84
    assert all(type(x) == float for x in somagic.datapoints.observations['Creative'])
85
86
    return {
87
        'success': [
88
            'Creative'
89
        ],
90
        'fail': [
91
            'Energetic'
92
        ],
93
    }
94
95
96
def test_discretization_operation(somagic, data_manager_command_decorators, discretiztion_test_data, define_command, get_command, test_discretizer, discretize_command, validate_discretization_operation_behaviour):
97
    print('INFO: datapoints columns:', somagic.datapoints.attributes)
98
    define_command(somagic.commands_decorators.data_manager_command(), discretize_command(test_discretizer))
99
    print('ELA',  set(type(x) for x in somagic.dataset.datapoints.column('Creative')))
100
    for attr_name in discretiztion_test_data['success']:
101
        cmd = get_command('test_discretize_command')
102
        cmd.args = [somagic.datapoints, attr_name, 4, f'binned_{attr_name}']
103
        cmd.execute()
104
105
        validate_discretization_operation_behaviour(cmd, test_discretizer.algorithm)
106
107
    for attr_name in discretiztion_test_data['fail']:
108
        cmd = get_command('test_discretize_command')
109
        cmd.args = [somagic.datapoints, attr_name, 4, f'binned_{attr_name}']
110
        with pytest.raises(TypeError):
111
            cmd.execute()
112