GitHub Access Token became invalid

It seems like the GitHub access token used for retrieving details about this repository from GitHub became invalid. This might prevent certain types of inspections from being run (in particular, everything related to pull requests).
Please ask an admin of your repository to re-new the access token on this website.
Passed
Push — master ( 2625ff...cc4c68 )
by Andreas
01:13
created

Test__validate_input.test__validate_input_smaller()   A

Complexity

Conditions 4

Size

Total Lines 8
Code Lines 7

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
cc 4
eloc 7
nop 1
dl 0
loc 8
rs 10
c 0
b 0
f 0
1
import numpy as np
2
import pandas as pd
3
import unittest
4
from klib.utils import _corr_selector
5
from klib.utils import _drop_duplicates
6
from klib.utils import _missing_vals
7
from klib.utils import _validate_input_bool
8
from klib.utils import _validate_input_int
9
from klib.utils import _validate_input_range
10
from klib.utils import _validate_input_smaller
11
12
13
if __name__ == '__main__':
14
    unittest.main()
15
16
17
class Test__corr_selector(unittest.TestCase):
18
19
    @classmethod
20
    def setUpClass(cls):
21
        cls.df_data_corr = pd.DataFrame([[1, 7, 2, 2, 4, 7],
22
                                         [3, 8, 3, 3, 7, 1],
23
                                         [5, 7, 9, 5, 1, 4],
24
                                         [1, 7, 8, 6, 1, 8],
25
                                         [1, 7, 5, 6, 2, 6],
26
                                         [2, 7, 3, 3, 5, 3]])
27
28
        cls.target = pd.Series([1, 2, 4, 7, 4, 2])
29
30
    def test__corr_selector_matrix(self):
31
        self.assertEqual(_corr_selector(self.df_data_corr.corr()).shape, (6, 6))
32
        self.assertEqual(_corr_selector(self.df_data_corr.corr(), split='pos').isna().sum().sum(), 18)
33
        self.assertEqual(_corr_selector(self.df_data_corr.corr(), split='pos', threshold=0.5).isna().sum().sum(), 26)
34
        self.assertEqual(_corr_selector(self.df_data_corr.corr(), split='neg', threshold=-0.75).isna().sum().sum(), 32)
35
        self.assertEqual(_corr_selector(self.df_data_corr.corr(), split='high', threshold=0.15).isna().sum().sum(), 4)
36
        self.assertEqual(_corr_selector(self.df_data_corr.corr(), split='low', threshold=0.85).isna().sum().sum(), 6)
37
38
    def test__corr_selector_label(self):
39
        self.assertEqual(_corr_selector(self.df_data_corr.corrwith(self.target)).shape, (6, ))
40
        self.assertEqual(_corr_selector(self.df_data_corr.corrwith(self.target), split='pos').isna().sum(), 3)
41
        self.assertEqual(_corr_selector(self.df_data_corr.corrwith(
42
            self.target), split='pos', threshold=0.8).isna().sum(), 4)
43
        self.assertEqual(_corr_selector(self.df_data_corr.corrwith(
44
            self.target), split='neg', threshold=-0.7).isna().sum(), 5)
45
        self.assertEqual(_corr_selector(self.df_data_corr.corrwith(
46
            self.target), split='high', threshold=0.2).isna().sum(), 1)
47
        self.assertEqual(_corr_selector(self.df_data_corr.corrwith(
48
            self.target), split='low', threshold=0.8).isna().sum(), 2)
49
50
51
class Test__drop_duplicates(unittest.TestCase):
52
53
    @classmethod
54
    def setUpClass(cls):
55
        cls.data_dupl_df = pd.DataFrame([[pd.NA, pd.NA, pd.NA, pd.NA],
56
                                         [1, 2, 3, 4],
57
                                         [1, 2, 3, 4],
58
                                         [1, 2, 3, 4],
59
                                         [2, 3, 4, 5],
60
                                         [1, 2, 3, pd.NA],
61
                                         [pd.NA, pd.NA, pd.NA, pd.NA]])
62
63
    def test__drop_dupl(self):
64
        # Test dropping of duplicate rows
65
        self.assertAlmostEqual(_drop_duplicates(self.data_dupl_df)[0].shape, (4, 4))
66
        # Test if the resulting DataFrame is equal to using the pandas method
67
        self.assertTrue(_drop_duplicates(self.data_dupl_df)[0].equals(self.data_dupl_df.drop_duplicates()))
68
        # Test number of duplicates
69
        self.assertEqual(len(_drop_duplicates(self.data_dupl_df)[1]), 3)
70
71
72
class Test__missing_vals(unittest.TestCase):
73
74
    @classmethod
75
    def setUpClass(cls):
76
        cls.data_mv_list = [[1, np.nan, 3, 4],
77
                            [None, 4, 5, None],
78
                            ['a', 'b', pd.NA, 'd'],
79
                            [True, False, 7, pd.NaT]]
80
81
        cls.data_mv_df = pd.DataFrame(cls.data_mv_list)
82
83
        cls.data_mv_array = np.array(cls.data_mv_list)
84
85
    def test_mv_total(self):
86
        # Test total missing values
87
        self.assertAlmostEqual(_missing_vals(self.data_mv_df)['mv_total'], 5)
88
        self.assertAlmostEqual(_missing_vals(self.data_mv_array)['mv_total'], 5)
89
        self.assertAlmostEqual(_missing_vals(self.data_mv_list)['mv_total'], 5)
90
91
    def test_mv_rows(self):
92
        # Test missing values for each row
93
        expected_results = [1, 2, 1, 1]
94
        for i, _ in enumerate(expected_results):
95
            self.assertAlmostEqual(_missing_vals(self.data_mv_df)['mv_rows'][i], expected_results[i])
96
97
    def test_mv_cols(self):
98
        # Test missing values for each column
99
        expected_results = [1, 1, 1, 2]
100
        for i, _ in enumerate(expected_results):
101
            self.assertAlmostEqual(_missing_vals(self.data_mv_df)['mv_cols'][i], expected_results[i])
102
103
    def test_mv_rows_ratio(self):
104
        # Test missing values ratio for each row
105
        expected_results = [0.25, 0.5, 0.25, 0.25]
106
        for i, _ in enumerate(expected_results):
107
            self.assertAlmostEqual(_missing_vals(self.data_mv_df)['mv_rows_ratio'][i], expected_results[i])
108
109
        # Test if missing value ratio is between 0 and 1
110
        for i in range(len(self.data_mv_df)):
111
            self.assertTrue(0 <= _missing_vals(self.data_mv_df)['mv_rows_ratio'][i] <= 1)
112
113
    def test_mv_cols_ratio(self):
114
        # Test missing values ratio for each column
115
        expected_results = [1/4, 0.25, 0.25, 0.5]
116
        for i, _ in enumerate(expected_results):
117
            self.assertAlmostEqual(_missing_vals(self.data_mv_df)['mv_cols_ratio'][i], expected_results[i])
118
119
        # Test if missing value ratio is between 0 and 1
120
        for i in range(len(self.data_mv_df)):
121
            self.assertTrue(0 <= _missing_vals(self.data_mv_df)['mv_cols_ratio'][i] <= 1)
122
123
124
class Test__validate_input(unittest.TestCase):
125
126
    def test__validate_input_bool(self):
127
        # Raises an exception if the input is not boolean
128
        with self.assertRaises(TypeError):
129
            _validate_input_bool('True', None)
130
        with self.assertRaises(TypeError):
131
            _validate_input_bool(None, None)
132
        with self.assertRaises(TypeError):
133
            _validate_input_bool(1, None)
134
135
    def test__validate_input_int(self):
136
        # Raises an exception if the input is not an integer
137
        with self.assertRaises(TypeError):
138
            _validate_input_int(1.1, None)
139
        with self.assertRaises(TypeError):
140
            _validate_input_int(True, None)
141
        with self.assertRaises(TypeError):
142
            _validate_input_int([1], None)
143
        with self.assertRaises(TypeError):
144
            _validate_input_int('1', None)
145
146
    def test__validate_input_smaller(self):
147
        # Raises an exception if the first value is larger than the second
148
        with self.assertRaises(ValueError):
149
            _validate_input_smaller(0.3, 0.2, None)
150
        with self.assertRaises(ValueError):
151
            _validate_input_smaller(3, 2, None)
152
        with self.assertRaises(ValueError):
153
            _validate_input_smaller(5, -3, None)
154
155
    def test__validate_input_range(self):
156
        with self.assertRaises(ValueError):
157
            _validate_input_range(-0.1, 'value -0.1', 0, 1)
158
159
        with self.assertRaises(ValueError):
160
            _validate_input_range(1.1, 'value 1.1', 0, 1)
161
162
        with self.assertRaises(TypeError):
163
            _validate_input_range('1', 'value string', 0, 1)
164