Passed
Pull Request — dev (#32)
by Konstantinos
04:53 queued 02:46
created

test_discretization   A

Complexity

Total Complexity 10

Size/Duplication

Total Lines 91
Duplicated Lines 0 %

Importance

Changes 0
Metric Value
eloc 61
dl 0
loc 91
rs 10
c 0
b 0
f 0
wmc 10

9 Functions

Rating   Name   Duplication   Size   Complexity  
A define_command() 0 6 1
A test_discretizer() 0 8 1
A test_discretization_on_non_preprocessed_attribute() 0 3 2
A discretize_command() 0 8 1
A cmd_to_succeed() 0 7 1
A validate_discretization_operation_behaviour() 0 16 1
A test_discretization_operation() 0 3 1
A cmd_to_fail() 0 7 1
A discretization_cmd() 0 13 1
1
import pytest
2
3
4
@pytest.fixture
5
def define_command():
6
    def _define_engine_command(decorator, command_function):
7
        decorator(command_function)
8
        return command_function.__name__
9
    return _define_engine_command
10
11
12
@pytest.fixture
13
def test_discretizer():
14
    from so_magic.data.discretization import Discretizer, BinningAlgorithm
15
16
    alg = BinningAlgorithm.from_built_in('pd.cut')
17
18
    discretizer = Discretizer.from_algorithm(alg)
19
    return discretizer
20
21
22
@pytest.fixture
23
def discretize_command():
24
    def get_discretize_command(discretizer):
25
        def test_discretize_command(data_manager, datapoints, attribute, nb_bins, new_column_name):
26
            output = discretizer.discretize(datapoints, attribute, nb_bins)
27
            data_manager.datapoints.add_column(output['result'], new_column_name)
28
        return test_discretize_command
29
    return get_discretize_command
30
31
32
@pytest.fixture
33
def validate_discretization_operation_behaviour():
34
    def _validate_discretization_operation(cmd, algorithm):
35
        datapoints = cmd.args[0]
36
        target_column = cmd.args[1]
37
        nb_bins = cmd.args[2]
38
        computed_bins = algorithm.output['settings']['used_bins']
39
        assert [_ for _ in computed_bins] == [-0.1, 25.0, 50.0, 75.0, 100.0]
40
41
        input_arguments = algorithm.output['settings']['arguments']
42
        to_check = [len(input_arguments[0]), input_arguments[1]]
43
        assert to_check == [len(datapoints), nb_bins]
44
        assert type(datapoints.column(target_column)) == type(input_arguments[0])
45
        assert list(datapoints.column(target_column)) == list(input_arguments[0])
46
        # assert algorithm.output['settings']['parameters'] == []
47
    return _validate_discretization_operation
48
49
50
@pytest.fixture
51
def discretization_cmd(somagic, test_datapoints, define_command, discretize_command, test_discretizer):
52
    """Get a discretization command after some 'pre-processing' done on the test datapoints."""
53
    series = somagic.dataset.datapoints.column('Creative').replace('', 0.0, inplace=False)
54
    assert all(type(x) == float for x in series)
55
56
    somagic.datapoints.add_column(list(series), 'Creative')
57
58
    assert all(type(x) == float for x in somagic.datapoints.observations['Creative'])
59
60
    test_discretize_command_name: str = define_command(somagic.commands_decorators.data_manager_command(),
61
                                                       discretize_command(test_discretizer))
62
    return getattr(somagic.command, test_discretize_command_name)
63
64
65
@pytest.fixture(params=[
66
    ['Creative'],
67
    # [],  # add more columns when we know the discretization command will succeed for them
68
])
69
def cmd_to_succeed(request, test_datapoints, discretization_cmd):
70
    discretization_cmd.args = [test_datapoints, request.param[0], 4, f'binned_{request.param[0]}']
71
    return discretization_cmd
72
73
74
def test_discretization_operation(cmd_to_succeed, test_discretizer, validate_discretization_operation_behaviour):
75
    cmd_to_succeed.execute()
76
    validate_discretization_operation_behaviour(cmd_to_succeed, test_discretizer.algorithm)
77
78
79
@pytest.fixture(params=[
80
    ['Energetic'],
81
    # [],  # add more columns when we know the discretization command will fail for them
82
])
83
def cmd_to_fail(request, test_datapoints, discretization_cmd):
84
    discretization_cmd.args = [test_datapoints, request.param[0], 4, f'binned_{request.param[0]}']
85
    return discretization_cmd
86
87
88
def test_discretization_on_non_preprocessed_attribute(cmd_to_fail):
89
    with pytest.raises(TypeError):
90
        cmd_to_fail.execute()
91