GitHub Access Token became invalid

It seems like the GitHub access token used for retrieving details about this repository from GitHub became invalid. This might prevent certain types of inspections from being run (in particular, everything related to pull requests).
Please ask an admin of your repository to re-new the access token on this website.
Passed
Push — master ( 5a4fe9...c92c0e )
by Andreas
01:16
created

Test__drop_duplicates.test__drop_dupl()   A

Complexity

Conditions 1

Size

Total Lines 7
Code Lines 4

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
cc 1
eloc 4
nop 1
dl 0
loc 7
rs 10
c 0
b 0
f 0
1
import numpy as np
2
import pandas as pd
3
import unittest
4
from ..utils import (
5
    _corr_selector,
6
    _drop_duplicates,
7
    _missing_vals,
8
    _validate_input_bool,
9
    _validate_input_int,
10
    _validate_input_range,
11
    _validate_input_smaller,
12
    _validate_input_sum_smaller,
13
    _validate_input_sum_larger,
14
)
15
16
17
class Test__corr_selector(unittest.TestCase):
18
    @classmethod
19
    def setUpClass(cls):
20
        cls.df_data_corr = pd.DataFrame(
21
            [
22
                [1, 7, 2, 2, 4, 7],
23
                [3, 8, 3, 3, 7, 1],
24
                [5, 7, 9, 5, 1, 4],
25
                [1, 7, 8, 6, 1, 8],
26
                [1, 7, 5, 6, 2, 6],
27
                [2, 7, 3, 3, 5, 3],
28
            ]
29
        )
30
31
        cls.target = pd.Series([1, 2, 4, 7, 4, 2])
32
33
    def test__corr_selector_matrix(self):
34
        self.assertEqual(_corr_selector(self.df_data_corr.corr()).shape, (6, 6))
35
        self.assertEqual(_corr_selector(self.df_data_corr.corr(), split="pos").isna().sum().sum(), 18)
36
        self.assertEqual(
37
            _corr_selector(self.df_data_corr.corr(), split="pos", threshold=0.5).isna().sum().sum(), 26
38
        )
39
        self.assertEqual(
40
            _corr_selector(self.df_data_corr.corr(), split="neg", threshold=-0.75).isna().sum().sum(), 32
41
        )
42
        self.assertEqual(
43
            _corr_selector(self.df_data_corr.corr(), split="high", threshold=0.15).isna().sum().sum(), 4
44
        )
45
        self.assertEqual(
46
            _corr_selector(self.df_data_corr.corr(), split="low", threshold=0.85).isna().sum().sum(), 6
47
        )
48
49
    def test__corr_selector_label(self):
50
        self.assertEqual(_corr_selector(self.df_data_corr.corrwith(self.target)).shape, (6,))
51
        self.assertEqual(_corr_selector(self.df_data_corr.corrwith(self.target), split="pos").isna().sum(), 3)
52
        self.assertEqual(
53
            _corr_selector(self.df_data_corr.corrwith(self.target), split="pos", threshold=0.8).isna().sum(),
54
            4,
55
        )
56
        self.assertEqual(
57
            _corr_selector(self.df_data_corr.corrwith(self.target), split="neg", threshold=-0.7).isna().sum(),
58
            5,
59
        )
60
        self.assertEqual(
61
            _corr_selector(self.df_data_corr.corrwith(self.target), split="high", threshold=0.2).isna().sum(),
62
            1,
63
        )
64
        self.assertEqual(
65
            _corr_selector(self.df_data_corr.corrwith(self.target), split="low", threshold=0.8).isna().sum(),
66
            2,
67
        )
68
69
70
class Test__drop_duplicates(unittest.TestCase):
71
    @classmethod
72
    def setUpClass(cls: pd.DataFrame) -> pd.DataFrame:
73
        cls.data_dupl_df = pd.DataFrame(
74
            [
75
                [pd.NA, pd.NA, pd.NA, pd.NA],
76
                [1, 2, 3, 4],
77
                [1, 2, 3, 4],
78
                [1, 2, 3, 4],
79
                [2, 3, 4, 5],
80
                [1, 2, 3, pd.NA],
81
                [pd.NA, pd.NA, pd.NA, pd.NA],
82
            ]
83
        )
84
85
    def test__drop_dupl(self):
86
        # Test dropping of duplicate rows
87
        self.assertAlmostEqual(_drop_duplicates(self.data_dupl_df)[0].shape, (4, 4))
88
        # Test if the resulting DataFrame is equal to using the pandas method
89
        self.assertTrue(_drop_duplicates(self.data_dupl_df)[0].equals(self.data_dupl_df.drop_duplicates()))
90
        # Test number of duplicates
91
        self.assertEqual(len(_drop_duplicates(self.data_dupl_df)[1]), 3)
92
93
94
class Test__missing_vals(unittest.TestCase):
95
    @classmethod
96
    def setUpClass(cls):
97
        cls.data_mv_list = [
98
            [1, np.nan, 3, 4],
99
            [None, 4, 5, None],
100
            ["a", "b", pd.NA, "d"],
101
            [True, False, 7, pd.NaT],
102
        ]
103
104
        cls.data_mv_df = pd.DataFrame(cls.data_mv_list)
105
106
        cls.data_mv_array = np.array(cls.data_mv_list)
107
108
    def test_mv_total(self):
109
        # Test total missing values
110
        self.assertAlmostEqual(_missing_vals(self.data_mv_df)["mv_total"], 5)
111
        self.assertAlmostEqual(_missing_vals(self.data_mv_array)["mv_total"], 5)
112
        self.assertAlmostEqual(_missing_vals(self.data_mv_list)["mv_total"], 5)
113
114
    def test_mv_rows(self):
115
        # Test missing values for each row
116
        expected_results = [1, 2, 1, 1]
117
        for i, _ in enumerate(expected_results):
118
            self.assertAlmostEqual(_missing_vals(self.data_mv_df)["mv_rows"][i], expected_results[i])
119
120
    def test_mv_cols(self):
121
        # Test missing values for each column
122
        expected_results = [1, 1, 1, 2]
123
        for i, _ in enumerate(expected_results):
124
            self.assertAlmostEqual(_missing_vals(self.data_mv_df)["mv_cols"][i], expected_results[i])
125
126
    def test_mv_rows_ratio(self):
127
        # Test missing values ratio for each row
128
        expected_results = [0.25, 0.5, 0.25, 0.25]
129
        for i, _ in enumerate(expected_results):
130
            self.assertAlmostEqual(_missing_vals(self.data_mv_df)["mv_rows_ratio"][i], expected_results[i])
131
132
        # Test if missing value ratio is between 0 and 1
133
        for i in range(len(self.data_mv_df)):
134
            self.assertTrue(0 <= _missing_vals(self.data_mv_df)["mv_rows_ratio"][i] <= 1)
135
136
    def test_mv_cols_ratio(self):
137
        # Test missing values ratio for each column
138
        expected_results = [1 / 4, 0.25, 0.25, 0.5]
139
        for i, _ in enumerate(expected_results):
140
            self.assertAlmostEqual(_missing_vals(self.data_mv_df)["mv_cols_ratio"][i], expected_results[i])
141
142
        # Test if missing value ratio is between 0 and 1
143
        for i in range(len(self.data_mv_df)):
144
            self.assertTrue(0 <= _missing_vals(self.data_mv_df)["mv_cols_ratio"][i] <= 1)
145
146
147
class Test__validate_input(unittest.TestCase):
148
    def test__validate_input_bool(self):
149
        # Raises an exception if the input is not boolean
150
        with self.assertRaises(TypeError):
151
            _validate_input_bool("True", None)
152
        with self.assertRaises(TypeError):
153
            _validate_input_bool(None, None)
154
        with self.assertRaises(TypeError):
155
            _validate_input_bool(1, None)
156
157
    def test__validate_input_int(self):
158
        # Raises an exception if the input is not an integer
159
        with self.assertRaises(TypeError):
160
            _validate_input_int(1.1, None)
161
        with self.assertRaises(TypeError):
162
            _validate_input_int([1], None)
163
        with self.assertRaises(TypeError):
164
            _validate_input_int("1", None)
165
166
    def test__validate_input_smaller(self):
167
        # Raises an exception if the first value is larger than the second
168
        with self.assertRaises(ValueError):
169
            _validate_input_smaller(0.3, 0.2, None)
170
        with self.assertRaises(ValueError):
171
            _validate_input_smaller(3, 2, None)
172
        with self.assertRaises(ValueError):
173
            _validate_input_smaller(5, -3, None)
174
175
    def test__validate_input_range(self):
176
        with self.assertRaises(ValueError):
177
            _validate_input_range(-0.1, "value -0.1", 0, 1)
178
179
        with self.assertRaises(ValueError):
180
            _validate_input_range(1.1, "value 1.1", 0, 1)
181
182
        with self.assertRaises(TypeError):
183
            _validate_input_range("1", "value string", 0, 1)
184
185
    def test__validate_input_sum_smaller(self):
186
        with self.assertRaises(ValueError):
187
            _validate_input_sum_smaller(1, "Test Sum <= 1", 1.01)
188
        with self.assertRaises(ValueError):
189
            _validate_input_sum_smaller(1, "Test Sum <= 1", 0.3, 0.2, 0.4, 0.5)
190
        with self.assertRaises(ValueError):
191
            _validate_input_sum_smaller(-1, "Test Sum <= -1", -0.2, -0.7)
192
        with self.assertRaises(ValueError):
193
            _validate_input_sum_smaller(10, "Test Sum <= 10", 20, -11, 2)
194
195
    def test__validate_input_sum_larger(self):
196
        with self.assertRaises(ValueError):
197
            _validate_input_sum_larger(1, "Test Sum >= 1", 0.99)
198
        with self.assertRaises(ValueError):
199
            _validate_input_sum_larger(1, "Test Sum >= 1", 0.9, 0.05)
200
        with self.assertRaises(ValueError):
201
            _validate_input_sum_larger(-2, "Test Sum >=-2", -3)
202
        with self.assertRaises(ValueError):
203
            _validate_input_sum_larger(7, "Test Sum >= 7", 1, 2, 3)
204