Passed
Pull Request — dev (#32)
by Konstantinos
03:03 queued 01:41
created

test_discretization   A

Complexity

Total Complexity 10

Size/Duplication

Total Lines 93
Duplicated Lines 0 %

Importance

Changes 0
Metric Value
eloc 66
dl 0
loc 93
rs 10
c 0
b 0
f 0
wmc 10

7 Functions

Rating   Name   Duplication   Size   Complexity  
A test_discretizer() 0 8 1
A discretization_test_data() 0 16 1
A get_command() 0 5 1
A define_command() 0 5 1
A discretize_command() 0 8 1
A validate_discretization_operation_behaviour() 0 19 1
A test_discretization_operation() 0 14 4
1
import pytest
2
3
4
@pytest.fixture
5
def define_command():
6
    def _define_engine_command(decorator, command_function):
7
        decorator(command_function)
8
    return _define_engine_command
9
10
11
@pytest.fixture
12
def get_command(somagic):
13
    def _get_command(command_name: str):
14
        return getattr(somagic.command, command_name)
15
    return _get_command
16
17
18
@pytest.fixture
19
def test_discretizer():
20
    from so_magic.data.discretization import Discretizer, BinningAlgorithm
21
22
    alg = BinningAlgorithm.from_built_in('pd.cut')
23
24
    discretizer = Discretizer.from_algorithm(alg)
25
    return discretizer
26
27
28
@pytest.fixture
29
def discretize_command():
30
    def get_discretize_command(discretizer):
31
        def test_discretize_command(data_manager, datapoints, attribute, nb_bins, new_column_name):
32
            output = discretizer.discretize(datapoints, attribute, nb_bins)
33
            data_manager.datapoints.add_column(output['result'], new_column_name)
34
        return test_discretize_command
35
    return get_discretize_command
36
37
38
@pytest.fixture
39
def validate_discretization_operation_behaviour():
40
    def _validate_discretization_operation(cmd, algorithm):
41
        datapoints = cmd.args[0]
42
        target_column = cmd.args[1]
43
        nb_bins = cmd.args[2]
44
        min_value = min(iter(datapoints.column(target_column)))
45
        max_value = max(iter(datapoints.column(target_column)))
46
        bin_size = (max_value - min_value) / float(nb_bins)
47
        computed_bins = algorithm.output['settings']['used_bins']
48
        assert [_ for _ in computed_bins] == [-0.1, 25.0, 50.0, 75.0, 100.0]
49
50
        input_arguments = algorithm.output['settings']['arguments']
51
        to_check = [len(input_arguments[0]), input_arguments[1]]
52
        assert to_check == [len(datapoints), nb_bins]
53
        assert type(datapoints.column(target_column)) == type(input_arguments[0])
54
        assert list(datapoints.column(target_column)) == list(input_arguments[0])
55
        # assert algorithm.output['settings']['parameters'] == []
56
    return _validate_discretization_operation
57
58
59
@pytest.fixture
60
def discretization_test_data(somagic, test_datapoints):
61
    series = somagic.dataset.datapoints.column('Creative').replace('', 0.0, inplace=False)
62
    assert all(type(x) == float for x in series)
63
64
    somagic.datapoints.add_column(list(series), 'Creative')
65
66
    
67
    assert all(type(x) == float for x in somagic.datapoints.observations['Creative'])
68
69
    return {
70
        'success': [
71
            'Creative'
72
        ],
73
        'fail': [
74
            'Energetic'
75
        ],
76
    }
77
78
79
def test_discretization_operation(somagic, discretization_test_data, define_command, get_command, test_discretizer, discretize_command, validate_discretization_operation_behaviour):
80
    define_command(somagic.commands_decorators.data_manager_command(), discretize_command(test_discretizer))
81
    for attr_name in discretization_test_data['success']:
82
        cmd = get_command('test_discretize_command')
83
        cmd.args = [somagic.datapoints, attr_name, 4, f'binned_{attr_name}']
84
        cmd.execute()
85
86
        validate_discretization_operation_behaviour(cmd, test_discretizer.algorithm)
87
88
    for attr_name in discretization_test_data['fail']:
89
        cmd = get_command('test_discretize_command')
90
        cmd.args = [somagic.datapoints, attr_name, 4, f'binned_{attr_name}']
91
        with pytest.raises(TypeError):
92
            cmd.execute()
93