GitHub Access Token became invalid

It seems like the GitHub access token used for retrieving details about this repository from GitHub became invalid. This might prevent certain types of inspections from being run (in particular, everything related to pull requests).
Please ask an admin of your repository to re-new the access token on this website.
Passed
Branch master (5deb01)
by Andreas
02:32
created

klib.tests.test_util   A

Complexity

Total Complexity 39

Size/Duplication

Total Lines 171
Duplicated Lines 0 %

Importance

Changes 0
Metric Value
eloc 128
dl 0
loc 171
rs 9.28
c 0
b 0
f 0
wmc 39

16 Methods

Rating   Name   Duplication   Size   Complexity  
A Test__corr_selector.test__corr_selector_matrix() 0 7 1
A Test__drop_duplicates.setUpClass() 0 9 1
A Test__corr_selector.test__corr_selector_label() 0 11 1
A Test__drop_duplicates.test__drop_dupl() 0 7 1
A Test__corr_selector.setUpClass() 0 10 1
A Test__missing_vals.test_mv_total() 0 5 1
A Test__missing_vals.test_mv_cols_ratio() 0 9 3
A Test__missing_vals.test_mv_cols() 0 5 2
A Test__missing_vals.setUpClass() 0 10 1
A Test__missing_vals.test_mv_rows() 0 5 2
A Test__missing_vals.test_mv_rows_ratio() 0 9 3
A Test__validate_input.test__validate_input_range() 0 9 4
A Test__validate_input.test__validate_input_int() 0 10 5
A Test__validate_input.test__validate_input_sum() 0 9 5
A Test__validate_input.test__validate_input_bool() 0 8 4
A Test__validate_input.test__validate_input_smaller() 0 8 4
1
import numpy as np
2
import pandas as pd
3
import unittest
4
from ..utils import (_corr_selector,
5
                     _drop_duplicates,
6
                     _missing_vals,
7
                     _validate_input_bool,
8
                     _validate_input_int,
9
                     _validate_input_range,
10
                     _validate_input_smaller,
11
                     _validate_input_sum)
12
13
14
class Test__corr_selector(unittest.TestCase):
15
16
    @classmethod
17
    def setUpClass(cls):
18
        cls.df_data_corr = pd.DataFrame([[1, 7, 2, 2, 4, 7],
19
                                         [3, 8, 3, 3, 7, 1],
20
                                         [5, 7, 9, 5, 1, 4],
21
                                         [1, 7, 8, 6, 1, 8],
22
                                         [1, 7, 5, 6, 2, 6],
23
                                         [2, 7, 3, 3, 5, 3]])
24
25
        cls.target = pd.Series([1, 2, 4, 7, 4, 2])
26
27
    def test__corr_selector_matrix(self):
28
        self.assertEqual(_corr_selector(self.df_data_corr.corr()).shape, (6, 6))
29
        self.assertEqual(_corr_selector(self.df_data_corr.corr(), split='pos').isna().sum().sum(), 18)
30
        self.assertEqual(_corr_selector(self.df_data_corr.corr(), split='pos', threshold=0.5).isna().sum().sum(), 26)
31
        self.assertEqual(_corr_selector(self.df_data_corr.corr(), split='neg', threshold=-0.75).isna().sum().sum(), 32)
32
        self.assertEqual(_corr_selector(self.df_data_corr.corr(), split='above', threshold=0.15).isna().sum().sum(), 4)
33
        self.assertEqual(_corr_selector(self.df_data_corr.corr(), split='below', threshold=0.85).isna().sum().sum(), 6)
34
35
    def test__corr_selector_label(self):
36
        self.assertEqual(_corr_selector(self.df_data_corr.corrwith(self.target)).shape, (6, ))
37
        self.assertEqual(_corr_selector(self.df_data_corr.corrwith(self.target), split='pos').isna().sum(), 3)
38
        self.assertEqual(_corr_selector(self.df_data_corr.corrwith(
39
            self.target), split='pos', threshold=0.8).isna().sum(), 4)
40
        self.assertEqual(_corr_selector(self.df_data_corr.corrwith(
41
            self.target), split='neg', threshold=-0.7).isna().sum(), 5)
42
        self.assertEqual(_corr_selector(self.df_data_corr.corrwith(
43
            self.target), split='above', threshold=0.2).isna().sum(), 1)
44
        self.assertEqual(_corr_selector(self.df_data_corr.corrwith(
45
            self.target), split='below', threshold=0.8).isna().sum(), 2)
46
47
48
class Test__drop_duplicates(unittest.TestCase):
49
50
    @classmethod
51
    def setUpClass(cls):
52
        cls.data_dupl_df = pd.DataFrame([[pd.NA, pd.NA, pd.NA, pd.NA],
53
                                         [1, 2, 3, 4],
54
                                         [1, 2, 3, 4],
55
                                         [1, 2, 3, 4],
56
                                         [2, 3, 4, 5],
57
                                         [1, 2, 3, pd.NA],
58
                                         [pd.NA, pd.NA, pd.NA, pd.NA]])
59
60
    def test__drop_dupl(self):
61
        # Test dropping of duplicate rows
62
        self.assertAlmostEqual(_drop_duplicates(self.data_dupl_df)[0].shape, (4, 4))
63
        # Test if the resulting DataFrame is equal to using the pandas method
64
        self.assertTrue(_drop_duplicates(self.data_dupl_df)[0].equals(self.data_dupl_df.drop_duplicates()))
65
        # Test number of duplicates
66
        self.assertEqual(len(_drop_duplicates(self.data_dupl_df)[1]), 3)
67
68
69
class Test__missing_vals(unittest.TestCase):
70
71
    @classmethod
72
    def setUpClass(cls):
73
        cls.data_mv_list = [[1, np.nan, 3, 4],
74
                            [None, 4, 5, None],
75
                            ['a', 'b', pd.NA, 'd'],
76
                            [True, False, 7, pd.NaT]]
77
78
        cls.data_mv_df = pd.DataFrame(cls.data_mv_list)
79
80
        cls.data_mv_array = np.array(cls.data_mv_list)
81
82
    def test_mv_total(self):
83
        # Test total missing values
84
        self.assertAlmostEqual(_missing_vals(self.data_mv_df)['mv_total'], 5)
85
        self.assertAlmostEqual(_missing_vals(self.data_mv_array)['mv_total'], 5)
86
        self.assertAlmostEqual(_missing_vals(self.data_mv_list)['mv_total'], 5)
87
88
    def test_mv_rows(self):
89
        # Test missing values for each row
90
        expected_results = [1, 2, 1, 1]
91
        for i, _ in enumerate(expected_results):
92
            self.assertAlmostEqual(_missing_vals(self.data_mv_df)['mv_rows'][i], expected_results[i])
93
94
    def test_mv_cols(self):
95
        # Test missing values for each column
96
        expected_results = [1, 1, 1, 2]
97
        for i, _ in enumerate(expected_results):
98
            self.assertAlmostEqual(_missing_vals(self.data_mv_df)['mv_cols'][i], expected_results[i])
99
100
    def test_mv_rows_ratio(self):
101
        # Test missing values ratio for each row
102
        expected_results = [0.25, 0.5, 0.25, 0.25]
103
        for i, _ in enumerate(expected_results):
104
            self.assertAlmostEqual(_missing_vals(self.data_mv_df)['mv_rows_ratio'][i], expected_results[i])
105
106
        # Test if missing value ratio is between 0 and 1
107
        for i in range(len(self.data_mv_df)):
108
            self.assertTrue(0 <= _missing_vals(self.data_mv_df)['mv_rows_ratio'][i] <= 1)
109
110
    def test_mv_cols_ratio(self):
111
        # Test missing values ratio for each column
112
        expected_results = [1/4, 0.25, 0.25, 0.5]
113
        for i, _ in enumerate(expected_results):
114
            self.assertAlmostEqual(_missing_vals(self.data_mv_df)['mv_cols_ratio'][i], expected_results[i])
115
116
        # Test if missing value ratio is between 0 and 1
117
        for i in range(len(self.data_mv_df)):
118
            self.assertTrue(0 <= _missing_vals(self.data_mv_df)['mv_cols_ratio'][i] <= 1)
119
120
121
class Test__validate_input(unittest.TestCase):
122
123
    def test__validate_input_bool(self):
124
        # Raises an exception if the input is not boolean
125
        with self.assertRaises(TypeError):
126
            _validate_input_bool('True', None)
127
        with self.assertRaises(TypeError):
128
            _validate_input_bool(None, None)
129
        with self.assertRaises(TypeError):
130
            _validate_input_bool(1, None)
131
132
    def test__validate_input_int(self):
133
        # Raises an exception if the input is not an integer
134
        with self.assertRaises(TypeError):
135
            _validate_input_int(1.1, None)
136
        with self.assertRaises(TypeError):
137
            _validate_input_int(True, None)
138
        with self.assertRaises(TypeError):
139
            _validate_input_int([1], None)
140
        with self.assertRaises(TypeError):
141
            _validate_input_int('1', None)
142
143
    def test__validate_input_smaller(self):
144
        # Raises an exception if the first value is larger than the second
145
        with self.assertRaises(ValueError):
146
            _validate_input_smaller(0.3, 0.2, None)
147
        with self.assertRaises(ValueError):
148
            _validate_input_smaller(3, 2, None)
149
        with self.assertRaises(ValueError):
150
            _validate_input_smaller(5, -3, None)
151
152
    def test__validate_input_range(self):
153
        with self.assertRaises(ValueError):
154
            _validate_input_range(-0.1, 'value -0.1', 0, 1)
155
156
        with self.assertRaises(ValueError):
157
            _validate_input_range(1.1, 'value 1.1', 0, 1)
158
159
        with self.assertRaises(TypeError):
160
            _validate_input_range('1', 'value string', 0, 1)
161
162
    def test__validate_input_sum(self):
163
        with self.assertRaises(ValueError):
164
            _validate_input_sum(1, 'Test Sum <= 1', 1.1)
165
        with self.assertRaises(ValueError):
166
            _validate_input_sum(1, 'Test Sum <= 1', 0.3, 0.2, 0.4, 0.5)
167
        with self.assertRaises(ValueError):
168
            _validate_input_sum(-1, 'Test Sum <= -1', -0.2, -0.7)
169
        with self.assertRaises(ValueError):
170
            _validate_input_sum(10, 'Test Sum <= 10', 20, -11, 2)
171