GitHub Access Token became invalid

It seems like the GitHub access token used for retrieving details about this repository from GitHub became invalid. This might prevent certain types of inspections from being run (in particular, everything related to pull requests).
Please ask an admin of your repository to re-new the access token on this website.
Test Failed
Pull Request — main (#17)
by Andreas
02:08
created

tests.test_util   B

Complexity

Total Complexity 43

Size/Duplication

Total Lines 259
Duplicated Lines 0 %

Importance

Changes 0
Metric Value
eloc 190
dl 0
loc 259
rs 8.96
c 0
b 0
f 0
wmc 43

17 Methods

Rating   Name   Duplication   Size   Complexity  
A Test__corr_selector.setUpClass() 0 14 1
A Test__missing_vals.test_mv_rows_ratio() 0 12 3
A Test__validate_input.test__validate_input_int() 0 8 4
A Test__missing_vals.test_mv_total() 0 5 1
A Test__missing_vals.test_mv_cols_ratio() 0 12 3
A Test__corr_selector.test__corr_selector_label() 0 41 1
A Test__missing_vals.test_mv_cols() 0 5 2
A Test__missing_vals.setUpClass() 0 12 1
A Test__validate_input.test__validate_input_bool() 0 8 4
A Test__validate_input.test__validate_input_sum_smaller() 0 9 5
A Test__validate_input.test__validate_input_range() 0 9 4
A Test__validate_input.test__validate_input_sum_larger() 0 9 5
A Test__missing_vals.test_mv_rows() 0 5 2
A Test__drop_duplicates.test__drop_dupl() 0 11 1
A Test__validate_input.test__validate_input_smaller() 0 8 4
A Test__corr_selector.test__corr_selector_matrix() 0 32 1
A Test__drop_duplicates.setUpClass() 0 11 1

How to fix   Complexity   

Complexity

Complex classes like tests.test_util often do a lot of different things. To break such a class down, we need to identify a cohesive component within that class. A common approach to find such a component is to look for fields/methods that share the same prefixes, or suffixes.

Once you have determined the fields that belong together, you can apply the Extract Class refactoring. If the component makes sense as a sub-class, Extract Subclass is also a candidate, and is often faster.

1
import unittest
2
3
import numpy as np
4
import pandas as pd
5
6
from klib.utils import (
7
    _corr_selector,
8
    _drop_duplicates,
9
    _missing_vals,
10
    _validate_input_bool,
11
    _validate_input_int,
12
    _validate_input_range,
13
    _validate_input_smaller,
14
    _validate_input_sum_larger,
15
    _validate_input_sum_smaller,
16
)
17
18
19
class Test__corr_selector(unittest.TestCase):
20
    @classmethod
21
    def setUpClass(cls):
22
        cls.df_data_corr = pd.DataFrame(
23
            [
24
                [1, 7, 2, 2, 4, 7],
25
                [3, 8, 3, 3, 7, 1],
26
                [5, 7, 9, 5, 1, 4],
27
                [1, 7, 8, 6, 1, 8],
28
                [1, 7, 5, 6, 2, 6],
29
                [2, 7, 3, 3, 5, 3],
30
            ]
31
        )
32
33
        cls.target = pd.Series([1, 2, 4, 7, 4, 2])
34
35
    def test__corr_selector_matrix(self):
36
        self.assertEqual(_corr_selector(self.df_data_corr.corr()).shape, (6, 6))
37
        self.assertEqual(
38
            _corr_selector(self.df_data_corr.corr(), split="pos").isna().sum().sum(), 18
39
        )
40
        self.assertEqual(
41
            _corr_selector(self.df_data_corr.corr(), split="pos", threshold=0.5)
42
            .isna()
43
            .sum()
44
            .sum(),
45
            26,
46
        )
47
        self.assertEqual(
48
            _corr_selector(self.df_data_corr.corr(), split="neg", threshold=-0.75)
49
            .isna()
50
            .sum()
51
            .sum(),
52
            32,
53
        )
54
        self.assertEqual(
55
            _corr_selector(self.df_data_corr.corr(), split="high", threshold=0.15)
56
            .isna()
57
            .sum()
58
            .sum(),
59
            4,
60
        )
61
        self.assertEqual(
62
            _corr_selector(self.df_data_corr.corr(), split="low", threshold=0.85)
63
            .isna()
64
            .sum()
65
            .sum(),
66
            6,
67
        )
68
69
    def test__corr_selector_label(self):
70
        self.assertEqual(
71
            _corr_selector(self.df_data_corr.corrwith(self.target)).shape, (6,)
72
        )
73
        self.assertEqual(
74
            _corr_selector(self.df_data_corr.corrwith(self.target), split="pos")
75
            .isna()
76
            .sum(),
77
            3,
78
        )
79
        self.assertEqual(
80
            _corr_selector(
81
                self.df_data_corr.corrwith(self.target), split="pos", threshold=0.8
82
            )
83
            .isna()
84
            .sum(),
85
            4,
86
        )
87
        self.assertEqual(
88
            _corr_selector(
89
                self.df_data_corr.corrwith(self.target), split="neg", threshold=-0.7
90
            )
91
            .isna()
92
            .sum(),
93
            5,
94
        )
95
        self.assertEqual(
96
            _corr_selector(
97
                self.df_data_corr.corrwith(self.target), split="high", threshold=0.2
98
            )
99
            .isna()
100
            .sum(),
101
            1,
102
        )
103
        self.assertEqual(
104
            _corr_selector(
105
                self.df_data_corr.corrwith(self.target), split="low", threshold=0.8
106
            )
107
            .isna()
108
            .sum(),
109
            2,
110
        )
111
112
113
class Test__drop_duplicates(unittest.TestCase):
114
    @classmethod
115
    def setUpClass(cls: pd.DataFrame) -> pd.DataFrame:
116
        cls.data_dupl_df = pd.DataFrame(
117
            [
118
                [pd.NA, pd.NA, pd.NA, pd.NA],
119
                [1, 2, 3, 4],
120
                [1, 2, 3, 4],
121
                [1, 2, 3, 4],
122
                [2, 3, 4, 5],
123
                [1, 2, 3, pd.NA],
124
                [pd.NA, pd.NA, pd.NA, pd.NA],
125
            ]
126
        )
127
128
    def test__drop_dupl(self):
129
        # Test dropping of duplicate rows
130
        self.assertAlmostEqual(_drop_duplicates(self.data_dupl_df)[0].shape, (4, 4))
131
        # Test if the resulting DataFrame is equal to using the pandas method
132
        self.assertTrue(
133
            _drop_duplicates(self.data_dupl_df)[0].equals(
134
                self.data_dupl_df.drop_duplicates().reset_index(drop=True)
135
            )
136
        )
137
        # Test number of duplicates
138
        self.assertEqual(len(_drop_duplicates(self.data_dupl_df)[1]), 3)
139
140
141
class Test__missing_vals(unittest.TestCase):
142
    @classmethod
143
    def setUpClass(cls):
144
        cls.data_mv_list = [
145
            [1, np.nan, 3, 4],
146
            [None, 4, 5, None],
147
            ["a", "b", pd.NA, "d"],
148
            [True, False, 7, pd.NaT],
149
        ]
150
151
        cls.data_mv_df = pd.DataFrame(cls.data_mv_list)
152
153
        cls.data_mv_array = np.array(cls.data_mv_list)
154
155
    def test_mv_total(self):
156
        # Test total missing values
157
        self.assertAlmostEqual(_missing_vals(self.data_mv_df)["mv_total"], 5)
158
        self.assertAlmostEqual(_missing_vals(self.data_mv_array)["mv_total"], 5)
159
        self.assertAlmostEqual(_missing_vals(self.data_mv_list)["mv_total"], 5)
160
161
    def test_mv_rows(self):
162
        # Test missing values for each row
163
        expected_results = [1, 2, 1, 1]
164
        for i, result in enumerate(expected_results):
165
            self.assertAlmostEqual(_missing_vals(self.data_mv_df)["mv_rows"][i], result)
166
167
    def test_mv_cols(self):
168
        # Test missing values for each column
169
        expected_results = [1, 1, 1, 2]
170
        for i, result in enumerate(expected_results):
171
            self.assertAlmostEqual(_missing_vals(self.data_mv_df)["mv_cols"][i], result)
172
173
    def test_mv_rows_ratio(self):
174
        # Test missing values ratio for each row
175
        expected_results = [0.25, 0.5, 0.25, 0.25]
176
        for i, result in enumerate(expected_results):
177
            self.assertAlmostEqual(
178
                _missing_vals(self.data_mv_df)["mv_rows_ratio"][i], result
179
            )
180
181
        # Test if missing value ratio is between 0 and 1
182
        for i, _ in enumerate(self.data_mv_df):
183
            self.assertTrue(
184
                0 <= _missing_vals(self.data_mv_df)["mv_rows_ratio"][i] <= 1
185
            )
186
187
    def test_mv_cols_ratio(self):
188
        # Test missing values ratio for each column
189
        expected_results = [1 / 4, 0.25, 0.25, 0.5]
190
        for i, result in enumerate(expected_results):
191
            self.assertAlmostEqual(
192
                _missing_vals(self.data_mv_df)["mv_cols_ratio"][i], result
193
            )
194
195
        # Test if missing value ratio is between 0 and 1
196
        for i, _ in enumerate(self.data_mv_df):
197
            self.assertTrue(
198
                0 <= _missing_vals(self.data_mv_df)["mv_cols_ratio"][i] <= 1
199
            )
200
201
202
class Test__validate_input(unittest.TestCase):
203
    def test__validate_input_bool(self):
204
        # Raises an exception if the input is not boolean
205
        with self.assertRaises(TypeError):
206
            _validate_input_bool("True", None)
207
        with self.assertRaises(TypeError):
208
            _validate_input_bool(None, None)
209
        with self.assertRaises(TypeError):
210
            _validate_input_bool(1, None)
211
212
    def test__validate_input_int(self):
213
        # Raises an exception if the input is not an integer
214
        with self.assertRaises(TypeError):
215
            _validate_input_int(1.1, None)
216
        with self.assertRaises(TypeError):
217
            _validate_input_int([1], None)
218
        with self.assertRaises(TypeError):
219
            _validate_input_int("1", None)
220
221
    def test__validate_input_smaller(self):
222
        # Raises an exception if the first value is larger than the second
223
        with self.assertRaises(ValueError):
224
            _validate_input_smaller(0.3, 0.2, None)
225
        with self.assertRaises(ValueError):
226
            _validate_input_smaller(3, 2, None)
227
        with self.assertRaises(ValueError):
228
            _validate_input_smaller(5, -3, None)
229
230
    def test__validate_input_range(self):
231
        with self.assertRaises(ValueError):
232
            _validate_input_range(-0.1, "value -0.1", 0, 1)
233
234
        with self.assertRaises(ValueError):
235
            _validate_input_range(1.1, "value 1.1", 0, 1)
236
237
        with self.assertRaises(TypeError):
238
            _validate_input_range("1", "value string", 0, 1)
239
240
    def test__validate_input_sum_smaller(self):
241
        with self.assertRaises(ValueError):
242
            _validate_input_sum_smaller(1, "Test Sum <= 1", 1.01)
243
        with self.assertRaises(ValueError):
244
            _validate_input_sum_smaller(1, "Test Sum <= 1", 0.3, 0.2, 0.4, 0.5)
245
        with self.assertRaises(ValueError):
246
            _validate_input_sum_smaller(-1, "Test Sum <= -1", -0.2, -0.7)
247
        with self.assertRaises(ValueError):
248
            _validate_input_sum_smaller(10, "Test Sum <= 10", 20, -11, 2)
249
250
    def test__validate_input_sum_larger(self):
251
        with self.assertRaises(ValueError):
252
            _validate_input_sum_larger(1, "Test Sum >= 1", 0.99)
253
        with self.assertRaises(ValueError):
254
            _validate_input_sum_larger(1, "Test Sum >= 1", 0.9, 0.05)
255
        with self.assertRaises(ValueError):
256
            _validate_input_sum_larger(-2, "Test Sum >=-2", -3)
257
        with self.assertRaises(ValueError):
258
            _validate_input_sum_larger(7, "Test Sum >= 7", 1, 2, 3)
259