Passed
Pull Request — dev (#32)
by Konstantinos
05:57 queued 03:46
created

test_discretization_operation()   A

Complexity

Conditions 4

Size

Total Lines 16
Code Lines 14

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
cc 4
eloc 14
nop 8
dl 0
loc 16
rs 9.7
c 0
b 0
f 0

How to fix   Many Parameters   

Many Parameters

Methods with many parameters are not only hard to understand, but their parameters also often become inconsistent when you need more, or different data.

There are several approaches to avoid long parameter lists:

1
import pytest
2
3
4
@pytest.fixture
5
def data_manager_command_decorators(somagic):
6
    return {
7
        'data_manager_cmd': somagic.commands_decorators.data_manager_command,
8
        'arbitrary_cmd': somagic.commands_decorators.arbitrary_command,
9
    }
10
11
12
@pytest.fixture
13
def define_command():
14
    def _define_engine_command(decorator, command_function):
15
        decorator(command_function)
16
    return _define_engine_command
17
18
19
@pytest.fixture
20
def get_command(somagic):
21
    def _get_command(command_name: str):
22
        return getattr(somagic.command, command_name)
23
    return _get_command
24
25
26
@pytest.fixture
27
def test_discretizer():
28
    from so_magic.data.discretization import Discretizer, BinningAlgorithm
29
30
    alg = BinningAlgorithm.from_built_in('pd.cut')
31
32
    discretizer = Discretizer.from_algorithm(alg)
33
    return discretizer
34
35
36
@pytest.fixture
37
def discretize_command():
38
    import pandas as pd
39
40
    def get_discretize_command(discretizer):
41
        def test_discretize_command(data_manager, datapoints, attribute, nb_bins, new_column_name):
42
            output = discretizer.discretize(datapoints, attribute, nb_bins)
43
            data_manager.datapoints.add_column(output['result'], new_column_name)
44
        return test_discretize_command
45
    return get_discretize_command
46
47
48
@pytest.fixture
49
def validate_discretization_operation_behaviour():
50
    def _validate_discretization_operation(cmd, algorithm):
51
        datapoints = cmd.args[0]
52
        target_column = cmd.args[1]
53
        nb_bins = cmd.args[2]
54
        min_value = min(iter(datapoints.column(target_column)))
55
        max_value = max(iter(datapoints.column(target_column)))
56
        bin_size = (max_value - min_value) / float(nb_bins)
57
        computed_bins = algorithm.output['settings']['used_bins']
58
        assert [_ for _ in computed_bins] == [-0.1, 25.0, 50.0, 75.0, 100.0]
59
60
        input_arguments = algorithm.output['settings']['arguments']
61
        to_check = [len(input_arguments[0]), input_arguments[1]]
62
        assert to_check == [len(datapoints), nb_bins]
63
        assert type(datapoints.column(target_column)) == type(input_arguments[0])
64
        assert list(datapoints.column(target_column)) == list(input_arguments[0])
65
        # assert algorithm.output['settings']['parameters'] == []
66
    return _validate_discretization_operation
67
68
69
@pytest.fixture
70
def discretiztion_test_data(somagic, load_test_data_this):
71
    load_test_data_this(somagic)
72
    print('DATAPOINTS BEFORE', len(somagic.datapoints.attributes))
73
    print(set(type(x) for x in somagic.dataset.datapoints.column('Creative')))
74
    series = somagic.dataset.datapoints.column('Creative').replace('', 0.0, inplace=False)
75
    assert all(type(x) == float for x in series)
76
    print(type(series))
77
    print('MIN', min(series))
78
    print('MAX', max(series))
79
80
    somagic.datapoints.add_column(list(series), 'Creative')
81
    print('DATAPOINTS AFTER', len(somagic.datapoints.attributes))
82
    print(set(type(x) for x in somagic.dataset.datapoints.column('Creative')))
83
    
84
    assert all(type(x) == float for x in somagic.datapoints.observations['Creative'])
85
86
    return {
87
        'success': [
88
            'Creative'
89
        ],
90
        'fail': [
91
            'Energetic'
92
        ],
93
    }
94
95
96
def test_discretization_operation(somagic, data_manager_command_decorators, discretiztion_test_data, define_command, get_command, test_discretizer, discretize_command, validate_discretization_operation_behaviour):
97
    print('INFO: datapoints columns:', somagic.datapoints.attributes)
98
    define_command(somagic.commands_decorators.data_manager_command(), discretize_command(test_discretizer))
99
    print('ELA',  set(type(x) for x in somagic.dataset.datapoints.column('Creative')))
100
    for attr_name in discretiztion_test_data['success']:
101
        cmd = get_command('test_discretize_command')
102
        cmd.args = [somagic.datapoints, attr_name, 4, f'binned_{attr_name}']
103
        cmd.execute()
104
105
        validate_discretization_operation_behaviour(cmd, test_discretizer.algorithm)
106
107
    for attr_name in discretiztion_test_data['fail']:
108
        cmd = get_command('test_discretize_command')
109
        cmd.args = [somagic.datapoints, attr_name, 4, f'binned_{attr_name}']
110
        with pytest.raises(TypeError):
111
            cmd.execute()
112