GitHub Access Token became invalid

It seems like the GitHub access token used for retrieving details about this repository from GitHub became invalid. This might prevent certain types of inspections from being run (in particular, everything related to pull requests).
Please ask an admin of your repository to re-new the access token on this website.

tests.test_util.Test__missing_vals.test_mv_rows()   A
last analyzed

Complexity

Conditions 2

Size

Total Lines 5
Code Lines 4

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
cc 2
eloc 4
nop 1
dl 0
loc 5
rs 10
c 0
b 0
f 0
1
import unittest
2
3
import numpy as np
4
import pandas as pd
5
import pytest
6
7
from klib.utils import _corr_selector
8
from klib.utils import _drop_duplicates
9
from klib.utils import _missing_vals
10
from klib.utils import _validate_input_bool
11
from klib.utils import _validate_input_int
12
from klib.utils import _validate_input_num_data
13
from klib.utils import _validate_input_range
14
from klib.utils import _validate_input_smaller
15
from klib.utils import _validate_input_sum_larger
16
from klib.utils import _validate_input_sum_smaller
17
18
19
class Test__corr_selector(unittest.TestCase):
20
    @classmethod
21
    def setUpClass(cls) -> None:
22
        cls.df_data_corr = pd.DataFrame(
23
            [
24
                [1, 7, 2, 2, 4, 7],
25
                [3, 8, 3, 3, 7, 1],
26
                [5, 7, 9, 5, 1, 4],
27
                [1, 7, 8, 6, 1, 8],
28
                [1, 7, 5, 6, 2, 6],
29
                [2, 7, 3, 3, 5, 3],
30
            ],
31
        )
32
33
        cls.target = pd.Series([1, 2, 4, 7, 4, 2])
34
35
    def test__corr_selector_matrix(self):
36
        assert _corr_selector(self.df_data_corr.corr()).shape == (6, 6)
37
        assert _corr_selector(self.df_data_corr.corr(), split="pos").isna().sum().sum() == 18
38
        assert (
39
            _corr_selector(self.df_data_corr.corr(), split="pos", threshold=0.5).isna().sum().sum()
40
            == 26
41
        )
42
        assert (
43
            _corr_selector(self.df_data_corr.corr(), split="neg", threshold=-0.75)
44
            .isna()
45
            .sum()
46
            .sum()
47
            == 32
48
        )
49
        assert (
50
            _corr_selector(self.df_data_corr.corr(), split="high", threshold=0.15)
51
            .isna()
52
            .sum()
53
            .sum()
54
            == 4
55
        )
56
        assert (
57
            _corr_selector(self.df_data_corr.corr(), split="low", threshold=0.85).isna().sum().sum()
58
            == 6
59
        )
60
61
    def test__corr_selector_label(self):
62
        assert _corr_selector(self.df_data_corr.corrwith(self.target)).shape == (6,)
63
        assert (
64
            _corr_selector(self.df_data_corr.corrwith(self.target), split="pos").isna().sum() == 3
65
        )
66
        assert (
67
            _corr_selector(
68
                self.df_data_corr.corrwith(self.target),
69
                split="pos",
70
                threshold=0.8,
71
            )
72
            .isna()
73
            .sum()
74
            == 4
75
        )
76
        assert (
77
            _corr_selector(
78
                self.df_data_corr.corrwith(self.target),
79
                split="neg",
80
                threshold=-0.7,
81
            )
82
            .isna()
83
            .sum()
84
            == 5
85
        )
86
        assert (
87
            _corr_selector(
88
                self.df_data_corr.corrwith(self.target),
89
                split="high",
90
                threshold=0.2,
91
            )
92
            .isna()
93
            .sum()
94
            == 1
95
        )
96
        assert (
97
            _corr_selector(
98
                self.df_data_corr.corrwith(self.target),
99
                split="low",
100
                threshold=0.8,
101
            )
102
            .isna()
103
            .sum()
104
            == 2
105
        )
106
107
108
class Test__drop_duplicates(unittest.TestCase):
109
    @classmethod
110
    def setUpClass(cls) -> None:
111
        cls.data_dupl_df = pd.DataFrame(
112
            [
113
                [pd.NA, pd.NA, pd.NA, pd.NA],
114
                [1, 2, 3, 4],
115
                [1, 2, 3, 4],
116
                [1, 2, 3, 4],
117
                [2, 3, 4, 5],
118
                [1, 2, 3, pd.NA],
119
                [pd.NA, pd.NA, pd.NA, pd.NA],
120
            ],
121
        )
122
123
    def test__drop_dupl(self) -> None:
124
        # Test dropping of duplicate rows
125
        assert _drop_duplicates(self.data_dupl_df)[0].shape == (4, 4)
126
        # Test if the resulting DataFrame is equal to using the pandas method
127
        assert _drop_duplicates(self.data_dupl_df)[0].equals(
128
            self.data_dupl_df.drop_duplicates().reset_index(drop=True),
129
        )
130
        # Test number of duplicates
131
        assert len(_drop_duplicates(self.data_dupl_df)[1]) == 3
132
133
134
class Test__missing_vals(unittest.TestCase):
135
    @classmethod
136
    def setUpClass(cls) -> None:
137
        cls.data_mv_list = [
138
            [1, np.nan, 3, 4],
139
            [None, 4, 5, None],
140
            ["a", "b", pd.NA, "d"],
141
            [True, False, 7, pd.NaT],
142
        ]
143
144
        cls.data_mv_df = pd.DataFrame(cls.data_mv_list)
145
146
        cls.data_mv_array = np.array(cls.data_mv_list)
147
148
    def test_mv_total(self) -> None:
149
        # Test total missing values
150
        assert _missing_vals(self.data_mv_df)["mv_total"] == 5
151
        assert _missing_vals(self.data_mv_array)["mv_total"] == 5
152
        assert _missing_vals(self.data_mv_list)["mv_total"] == 5
153
154
    def test_mv_rows(self) -> None:
155
        # Test missing values for each row
156
        expected_results = [1, 2, 1, 1]
157
        for i, result in enumerate(expected_results):
158
            assert _missing_vals(self.data_mv_df)["mv_rows"][i] == result
159
160
    def test_mv_cols(self) -> None:
161
        # Test missing values for each column
162
        expected_results = [1, 1, 1, 2]
163
        for i, result in enumerate(expected_results):
164
            assert _missing_vals(self.data_mv_df)["mv_cols"][i] == result
165
166
    def test_mv_rows_ratio(self) -> None:
167
        # Test missing values ratio for each row
168
        expected_results = [0.25, 0.5, 0.25, 0.25]
169
        for i, result in enumerate(expected_results):
170
            assert _missing_vals(self.data_mv_df)["mv_rows_ratio"][i] == result
171
172
        # Test if missing value ratio is between 0 and 1
173
        for i, _ in enumerate(self.data_mv_df):
174
            assert 0 <= _missing_vals(self.data_mv_df)["mv_rows_ratio"][i] <= 1
175
176
    def test_mv_cols_ratio(self) -> None:
177
        # Test missing values ratio for each column
178
        expected_results = [1 / 4, 0.25, 0.25, 0.5]
179
        for i, result in enumerate(expected_results):
180
            assert _missing_vals(self.data_mv_df)["mv_cols_ratio"][i] == result
181
182
        # Test if missing value ratio is between 0 and 1
183
        for i, _ in enumerate(self.data_mv_df):
184
            assert 0 <= _missing_vals(self.data_mv_df)["mv_cols_ratio"][i] <= 1
185
186
187
class Test__validate_input(unittest.TestCase):
188
    def test__validate_input_bool(self) -> None:
189
        # Raises an exception if the input is not boolean
190
        with pytest.raises(TypeError):
191
            _validate_input_bool("True", "No description")
192
        with pytest.raises(TypeError):
193
            _validate_input_bool(None, "No description")
194
        with pytest.raises(TypeError):
195
            _validate_input_bool(1, "No description")
196
197
    def test__validate_input_int(self) -> None:
198
        # Raises an exception if the input is not an integer
199
        with pytest.raises(TypeError):
200
            _validate_input_int(1.1, "No description")
201
        with pytest.raises(TypeError):
202
            _validate_input_int([1], "No description")
203
        with pytest.raises(TypeError):
204
            _validate_input_int("1", "No description")
205
206
    def test__validate_input_smaller(self) -> None:
207
        # Raises an exception if the first value is larger than the second
208
        with pytest.raises(ValueError, match="The first input for 'some check' should"):
209
            _validate_input_smaller(0.3, 0.2, "some check")
210
        with pytest.raises(ValueError, match="The first input for 'some check' should"):
211
            _validate_input_smaller(3, 2, "some check")
212
        with pytest.raises(ValueError, match="The first input for 'some check' should"):
213
            _validate_input_smaller(5, -3, "some check")
214
215
    def test__validate_input_range(self) -> None:
216
        with pytest.raises(
217
            ValueError,
218
            match="'actual' = -0.1 but should be 0 <= 'actual' <= 1.",
219
        ):
220
            _validate_input_range(-0.1, "actual", 0, 1)
221
222
        with pytest.raises(
223
            ValueError,
224
            match="'actual' = 1.1 but should be 0 <= 'actual' <= 1.",
225
        ):
226
            _validate_input_range(1.1, "actual", 0, 1)
227
228
        with pytest.raises(TypeError):
229
            _validate_input_range("1", "value string", 0, 1)
230
231
    def test__validate_input_sum_smaller(self) -> None:
232
        with pytest.raises(
233
            ValueError,
234
            match="The sum of input values for 'Test Sum <= 1' should be less or equal to 1.",
235
        ):
236
            _validate_input_sum_smaller(1, "Test Sum <= 1", 1.01)
237
        with pytest.raises(
238
            ValueError,
239
            match="The sum of input values for 'Test Sum <= 1' should be less or equal to 1.",
240
        ):
241
            _validate_input_sum_smaller(1, "Test Sum <= 1", 0.3, 0.2, 0.4, 0.5)
242
        with pytest.raises(
243
            ValueError,
244
            match="The sum of input values for 'Test Sum <= -1' should be less or equal to -1.",
245
        ):
246
            _validate_input_sum_smaller(-1, "Test Sum <= -1", -0.2, -0.7)
247
        with pytest.raises(
248
            ValueError,
249
            match="The sum of input values for 'Test Sum <= 10' should be less or equal to 10.",
250
        ):
251
            _validate_input_sum_smaller(10, "Test Sum <= 10", 20, -11, 2)
252
253
    def test__validate_input_sum_larger(self) -> None:
254
        with pytest.raises(
255
            ValueError,
256
            match="The sum of input values for 'Test Sum >= 1' should be larger/equal to 1.",
257
        ):
258
            _validate_input_sum_larger(1, "Test Sum >= 1", 0.99)
259
        with pytest.raises(
260
            ValueError,
261
            match="The sum of input values for 'Test Sum >= 1' should be larger/equal to 1.",
262
        ):
263
            _validate_input_sum_larger(1, "Test Sum >= 1", 0.9, 0.05)
264
        with pytest.raises(
265
            ValueError,
266
            match="The sum of input values for 'Test Sum >=-2' should be larger/equal to -2.",
267
        ):
268
            _validate_input_sum_larger(-2, "Test Sum >=-2", -3)
269
        with pytest.raises(
270
            ValueError,
271
            match="The sum of input values for 'Test Sum >= 7' should be larger/equal to 7.",
272
        ):
273
            _validate_input_sum_larger(7, "Test Sum >= 7", 1, 2, 3)
274
275
    def test__validate_input_num_data(self) -> None:
276
        with pytest.raises(TypeError):
277
            _validate_input_num_data(
278
                pd.DataFrame({"col1": ["a", "b", "c"]}),
279
                "No description",
280
            )
281
282
        _validate_input_num_data(
283
            pd.DataFrame({"col1": [1, 2, 3]}),
284
            "No description",
285
        )  # No exception
286