GitHub Access Token became invalid

It seems like the GitHub access token used for retrieving details about this repository from GitHub became invalid. This might prevent certain types of inspections from being run (in particular, everything related to pull requests).
Please ask an admin of your repository to re-new the access token on this website.
Passed
Push — master ( b8c70d...3ac3d9 )
by Andreas
01:13
created

Test__missing_vals.test_mv_cols()   A

Complexity

Conditions 2

Size

Total Lines 5
Code Lines 4

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
cc 2
eloc 4
nop 1
dl 0
loc 5
rs 10
c 0
b 0
f 0
1
import numpy as np
2
import pandas as pd
3
import unittest
4
from klib.utils import _corr_selector
5
from klib.utils import _drop_duplicates
6
from klib.utils import _missing_vals
7
from klib.utils import _validate_input_0_1
8
from klib.utils import _validate_input_bool
9
10
if __name__ == '__main__':
11
    unittest.main()
12
13
14
class Test__corr_selector(unittest.TestCase):
15
16
    @classmethod
17
    def setUpClass(cls):
18
        cls.df_data_corr = pd.DataFrame([[1, 7, 2, 2, 4, 7],
19
                                         [3, 8, 3, 3, 7, 1],
20
                                         [5, 7, 9, 5, 1, 4],
21
                                         [1, 7, 8, 6, 1, 8],
22
                                         [1, 7, 5, 6, 2, 6],
23
                                         [2, 7, 3, 3, 5, 3]])
24
25
        cls.target = pd.Series([1, 2, 4, 7, 4, 2])
26
27
    def test__corr_selector_matrix(self):
28
        self.assertEqual(_corr_selector(self.df_data_corr.corr()).shape, (6, 6))
29
        self.assertEqual(_corr_selector(self.df_data_corr.corr(), split='pos').isna().sum().sum(), 18)
30
        self.assertEqual(_corr_selector(self.df_data_corr.corr(), split='pos', threshold=0.5).isna().sum().sum(), 26)
31
        self.assertEqual(_corr_selector(self.df_data_corr.corr(), split='neg', threshold=-0.75).isna().sum().sum(), 32)
32
        self.assertEqual(_corr_selector(self.df_data_corr.corr(), split='high', threshold=0.15).isna().sum().sum(), 4)
33
        self.assertEqual(_corr_selector(self.df_data_corr.corr(), split='low', threshold=0.85).isna().sum().sum(), 6)
34
35
    def test__corr_selector_label(self):
36
        self.assertEqual(_corr_selector(self.df_data_corr.corrwith(self.target)).shape, (6, ))
37
        self.assertEqual(_corr_selector(self.df_data_corr.corrwith(self.target), split='pos').isna().sum(), 3)
38
        self.assertEqual(_corr_selector(self.df_data_corr.corrwith(
39
            self.target), split='pos', threshold=0.8).isna().sum(), 4)
40
        self.assertEqual(_corr_selector(self.df_data_corr.corrwith(
41
            self.target), split='neg', threshold=-0.7).isna().sum(), 5)
42
        self.assertEqual(_corr_selector(self.df_data_corr.corrwith(
43
            self.target), split='high', threshold=0.2).isna().sum(), 1)
44
        self.assertEqual(_corr_selector(self.df_data_corr.corrwith(
45
            self.target), split='low', threshold=0.8).isna().sum(), 2)
46
47
48
class Test__drop_duplicates(unittest.TestCase):
49
50
    @classmethod
51
    def setUpClass(cls):
52
        cls.data_dupl_df = pd.DataFrame([[pd.NA, pd.NA, pd.NA, pd.NA],
53
                                         [1, 2, 3, 4],
54
                                         [1, 2, 3, 4],
55
                                         [1, 2, 3, 4],
56
                                         [2, 3, 4, 5],
57
                                         [1, 2, 3, pd.NA],
58
                                         [pd.NA, pd.NA, pd.NA, pd.NA]])
59
60
    def test__drop_dupl(self):
61
        # Test dropping of duplicate rows
62
        self.assertAlmostEqual(_drop_duplicates(self.data_dupl_df)[0].shape, (4, 4))
63
        # Test if the resulting DataFrame is equal to using the pandas method
64
        self.assertTrue(_drop_duplicates(self.data_dupl_df)[0].equals(self.data_dupl_df.drop_duplicates()))
65
        # Test number of duplicates
66
        self.assertEqual(len(_drop_duplicates(self.data_dupl_df)[1]), 3)
67
68
69
class Test__missing_vals(unittest.TestCase):
70
71
    @classmethod
72
    def setUpClass(cls):
73
        cls.data_mv_df = pd.DataFrame([[1, np.nan, 3, 4],
74
                                       [None, 4, 5, None],
75
                                       ['a', 'b', pd.NA, 'd'],
76
                                       [True, False, 7, pd.NaT]])
77
78
        cls.data_mv_array = np.array([[1, np.nan, 3, 4],
79
                                      [None, 4, 5, None],
80
                                      ['a', 'b', pd.NA, 'd'],
81
                                      [True, False, 7, pd.NaT]])
82
83
        cls.data_mv_list = [[1, np.nan, 3, 4],
84
                            [None, 4, 5, None],
85
                            ['a', 'b', pd.NA, 'd'],
86
                            [True, False, 7, pd.NaT]]
87
88
    def test_mv_total(self):
89
        # Test total missing values
90
        self.assertAlmostEqual(_missing_vals(self.data_mv_df)['mv_total'], 5)
91
        self.assertAlmostEqual(_missing_vals(self.data_mv_array)['mv_total'], 5)
92
        self.assertAlmostEqual(_missing_vals(self.data_mv_list)['mv_total'], 5)
93
94
    def test_mv_rows(self):
95
        # Test missing values for each row
96
        expected_results = [1, 2, 1, 1]
97
        for i, _ in enumerate(expected_results):
98
            self.assertAlmostEqual(_missing_vals(self.data_mv_df)['mv_rows'][i], expected_results[i])
99
100
    def test_mv_cols(self):
101
        # Test missing values for each column
102
        expected_results = [1, 1, 1, 2]
103
        for i, _ in enumerate(expected_results):
104
            self.assertAlmostEqual(_missing_vals(self.data_mv_df)['mv_cols'][i], expected_results[i])
105
106
    def test_mv_rows_ratio(self):
107
        # Test missing values ratio for each row
108
        expected_results = [0.25, 0.5, 0.25, 0.25]
109
        for i, _ in enumerate(expected_results):
110
            self.assertAlmostEqual(_missing_vals(self.data_mv_df)['mv_rows_ratio'][i], expected_results[i])
111
112
        # Test if missing value ratio is between 0 and 1
113
        for i in range(len(self.data_mv_df)):
114
            self.assertTrue(0 <= _missing_vals(self.data_mv_df)['mv_rows_ratio'][i] <= 1)
115
116
    def test_mv_cols_ratio(self):
117
        # Test missing values ratio for each column
118
        expected_results = [1/4, 0.25, 0.25, 0.5]
119
        for i, _ in enumerate(expected_results):
120
            self.assertAlmostEqual(_missing_vals(self.data_mv_df)['mv_cols_ratio'][i], expected_results[i])
121
122
        # Test if missing value ratio is between 0 and 1
123
        for i in range(len(self.data_mv_df)):
124
            self.assertTrue(0 <= _missing_vals(self.data_mv_df)['mv_cols_ratio'][i] <= 1)
125
126
127
class Test__validate_input(unittest.TestCase):
128
129
    def test__validate_input_0_1(self):
130
        with self.assertRaises(ValueError):
131
            _validate_input_0_1(-0.1, '-0.1')
132
133
        with self.assertRaises(ValueError):
134
            _validate_input_0_1(1.1, '1.1')
135
136
    def test__validate_input_bool(self):
137
        # Raises an exception if the input is not boolean
138
        with self.assertRaises(ValueError):
139
            _validate_input_bool('True', None)
140
        with self.assertRaises(ValueError):
141
            _validate_input_bool(None, None)
142
        with self.assertRaises(ValueError):
143
            _validate_input_bool(1, None)
144