Passed
Pull Request — dev (#32)
by Konstantinos
04:02 queued 02:26
created

test_discretization_operation()   A

Complexity

Conditions 4

Size

Total Lines 16
Code Lines 14

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
cc 4
eloc 14
nop 8
dl 0
loc 16
rs 9.7
c 0
b 0
f 0

How to fix   Many Parameters   

Many Parameters

Methods with many parameters are not only hard to understand, but their parameters also often become inconsistent when you need more, or different data.

There are several approaches to avoid long parameter lists:

1
import pytest
2
3
4
@pytest.fixture
5
def data_manager_command_decorators(somagic):
6
    return {
7
        'data_manager_cmd': somagic.commands_decorators.data_manager_command,
8
        'arbitrary_cmd': somagic.commands_decorators.arbitrary_command,
9
    }
10
11
12
@pytest.fixture
13
def define_command():
14
    def _define_engine_command(decorator, command_function):
15
        decorator(command_function)
16
    return _define_engine_command
17
18
19
@pytest.fixture
20
def get_command(somagic):
21
    def _get_command(command_name: str):
22
        return getattr(somagic.command, command_name)
23
    return _get_command
24
25
26
@pytest.fixture
27
def test_discretizer():
28
    from so_magic.data.discretization import Discretizer, BinningAlgorithm
29
30
    alg = BinningAlgorithm.from_built_in('pd.cut')
31
32
    discretizer = Discretizer.from_algorithm(alg)
33
    return discretizer
34
35
36
@pytest.fixture
37
def discretize_command():
38
    def get_discretize_command(discretizer):
39
        def test_discretize_command(data_manager, datapoints, attribute, nb_bins, new_column_name):
40
            output = discretizer.discretize(datapoints, attribute, nb_bins)
41
            data_manager.datapoints.add_column(output['result'], new_column_name)
42
        return test_discretize_command
43
    return get_discretize_command
44
45
46
@pytest.fixture
47
def validate_discretization_operation_behaviour():
48
    def _validate_discretization_operation(cmd, algorithm):
49
        datapoints = cmd.args[0]
50
        target_column = cmd.args[1]
51
        nb_bins = cmd.args[2]
52
        min_value = min(iter(datapoints.column(target_column)))
53
        max_value = max(iter(datapoints.column(target_column)))
54
        bin_size = (max_value - min_value) / float(nb_bins)
55
        computed_bins = algorithm.output['settings']['used_bins']
56
        assert [_ for _ in computed_bins] == [-0.1, 25.0, 50.0, 75.0, 100.0]
57
58
        input_arguments = algorithm.output['settings']['arguments']
59
        to_check = [len(input_arguments[0]), input_arguments[1]]
60
        assert to_check == [len(datapoints), nb_bins]
61
        assert type(datapoints.column(target_column)) == type(input_arguments[0])
62
        assert list(datapoints.column(target_column)) == list(input_arguments[0])
63
        # assert algorithm.output['settings']['parameters'] == []
64
    return _validate_discretization_operation
65
66
67
@pytest.fixture
68
def discretiztion_test_data(somagic, load_test_data_this):
69
    load_test_data_this(somagic)
70
    print('DATAPOINTS BEFORE', len(somagic.datapoints.attributes))
71
    print(set(type(x) for x in somagic.dataset.datapoints.column('Creative')))
72
    series = somagic.dataset.datapoints.column('Creative').replace('', 0.0, inplace=False)
73
    assert all(type(x) == float for x in series)
74
    print(type(series))
75
    print('MIN', min(series))
76
    print('MAX', max(series))
77
78
    somagic.datapoints.add_column(list(series), 'Creative')
79
    print('DATAPOINTS AFTER', len(somagic.datapoints.attributes))
80
    print(set(type(x) for x in somagic.dataset.datapoints.column('Creative')))
81
    
82
    assert all(type(x) == float for x in somagic.datapoints.observations['Creative'])
83
84
    return {
85
        'success': [
86
            'Creative'
87
        ],
88
        'fail': [
89
            'Energetic'
90
        ],
91
    }
92
93
94
def test_discretization_operation(somagic, data_manager_command_decorators, discretiztion_test_data, define_command, get_command, test_discretizer, discretize_command, validate_discretization_operation_behaviour):
95
    print('INFO: datapoints columns:', somagic.datapoints.attributes)
96
    define_command(somagic.commands_decorators.data_manager_command(), discretize_command(test_discretizer))
97
    print('ELA',  set(type(x) for x in somagic.dataset.datapoints.column('Creative')))
98
    for attr_name in discretiztion_test_data['success']:
99
        cmd = get_command('test_discretize_command')
100
        cmd.args = [somagic.datapoints, attr_name, 4, f'binned_{attr_name}']
101
        cmd.execute()
102
103
        validate_discretization_operation_behaviour(cmd, test_discretizer.algorithm)
104
105
    for attr_name in discretiztion_test_data['fail']:
106
        cmd = get_command('test_discretize_command')
107
        cmd.args = [somagic.datapoints, attr_name, 4, f'binned_{attr_name}']
108
        with pytest.raises(TypeError):
109
            cmd.execute()
110