GitHub Access Token became invalid

It seems like the GitHub access token used for retrieving details about this repository from GitHub became invalid. This might prevent certain types of inspections from being run (in particular, everything related to pull requests).
Please ask an admin of your repository to re-new the access token on this website.
Passed
Push — master ( 5df733...c3852e )
by Andreas
01:29
created

klib.tests.test_describe   A

Complexity

Total Complexity 9

Size/Duplication

Total Lines 98
Duplicated Lines 24.49 %

Importance

Changes 0
Metric Value
eloc 70
dl 24
loc 98
rs 10
c 0
b 0
f 0
wmc 9

9 Methods

Rating   Name   Duplication   Size   Complexity  
A Test__missing_vals.setUpClass() 0 17 1
A Test_corr_mat.test_output_type() 0 4 1
A Test__missing_vals.test_mv_cols() 0 6 1
A Test__missing_vals.test_mv_cols_ratio() 12 12 1
A Test_corr_mat.test_output_shape() 0 4 1
A Test__missing_vals.test_mv_total() 0 5 1
A Test__missing_vals.test_mv_rows() 0 6 1
A Test_corr_mat.setUpClass() 0 9 1
A Test__missing_vals.test_mv_rows_ratio() 12 12 1

How to fix   Duplicated Code   

Duplicated Code

Duplicate code is one of the most pungent code smells. A rule that is often used is to re-structure code once it is duplicated in three or more places.

Common duplication problems, and corresponding solutions are:

1
import numpy as np
2
import pandas as pd
3
import unittest
4
from klib.describe import _missing_vals, corr_mat
5
6
if __name__ == '__main__':
7
    unittest.main()
8
9
10
class Test__missing_vals(unittest.TestCase):
11
12
    @classmethod
13
    def setUpClass(cls):
14
        cls.data_mv_df = pd.DataFrame([[1, np.nan, 3, 4],
15
                                       [None, 4, 5, None],
16
                                       ['a', 'b', pd.NA, 'd'],
17
                                       [True, False, 7, pd.NaT]],
18
                                      columns=['Col1', 'Col2', 'Col3', 'Col4'])
19
20
        cls.data_mv_array = np.array([[1, np.nan, 3, 4],
21
                                      [None, 4, 5, None],
22
                                      ['a', 'b', pd.NA, 'd'],
23
                                      [True, False, 7, pd.NaT]])
24
25
        cls.data_mv_list = [[1, np.nan, 3, 4],
26
                            [None, 4, 5, None],
27
                            ['a', 'b', pd.NA, 'd'],
28
                            [True, False, 7, pd.NaT]]
29
30
    def test_mv_total(self):
31
        # Test total missing values
32
        self.assertAlmostEqual(_missing_vals(self.data_mv_df)['mv_total'], 5)
33
        self.assertAlmostEqual(_missing_vals(self.data_mv_array)['mv_total'], 5)
34
        self.assertAlmostEqual(_missing_vals(self.data_mv_list)['mv_total'], 5)
35
36
    def test_mv_rows(self):
37
        # Test missing values for each row
38
        self.assertAlmostEqual(_missing_vals(self.data_mv_df)['mv_rows'][0], 1)
39
        self.assertAlmostEqual(_missing_vals(self.data_mv_df)['mv_rows'][1], 2)
40
        self.assertAlmostEqual(_missing_vals(self.data_mv_df)['mv_rows'][2], 1)
41
        self.assertAlmostEqual(_missing_vals(self.data_mv_df)['mv_rows'][3], 1)
42
43
    def test_mv_cols(self):
44
        # Test missing values for each column
45
        self.assertAlmostEqual(_missing_vals(self.data_mv_df)['mv_cols'][0], 1)
46
        self.assertAlmostEqual(_missing_vals(self.data_mv_df)['mv_cols'][1], 1)
47
        self.assertAlmostEqual(_missing_vals(self.data_mv_df)['mv_cols'][2], 1)
48
        self.assertAlmostEqual(_missing_vals(self.data_mv_df)['mv_cols'][3], 2)
49
50 View Code Duplication
    def test_mv_rows_ratio(self):
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated in your project.
Loading history...
51
        # Test missing values ratio for each row
52
        self.assertAlmostEqual(_missing_vals(self.data_mv_df)['mv_rows_ratio'][0], 0.25)
53
        self.assertAlmostEqual(_missing_vals(self.data_mv_df)['mv_rows_ratio'][1], 0.5)
54
        self.assertAlmostEqual(_missing_vals(self.data_mv_df)['mv_rows_ratio'][2], 0.25)
55
        self.assertAlmostEqual(_missing_vals(self.data_mv_df)['mv_rows_ratio'][3], 0.25)
56
57
        # Test if missing value ratio is between 0 and 1
58
        self.assertTrue(0 <= _missing_vals(self.data_mv_df)['mv_rows_ratio'][0] <= 1)
59
        self.assertTrue(0 <= _missing_vals(self.data_mv_df)['mv_rows_ratio'][1] <= 1)
60
        self.assertTrue(0 <= _missing_vals(self.data_mv_df)['mv_rows_ratio'][2] <= 1)
61
        self.assertTrue(0 <= _missing_vals(self.data_mv_df)['mv_rows_ratio'][3] <= 1)
62
63 View Code Duplication
    def test_mv_cols_ratio(self):
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated in your project.
Loading history...
64
        # Test missing values ratio for each row
65
        self.assertAlmostEqual(_missing_vals(self.data_mv_df)['mv_cols_ratio'][0], 0.25)
66
        self.assertAlmostEqual(_missing_vals(self.data_mv_df)['mv_cols_ratio'][1], 0.25)
67
        self.assertAlmostEqual(_missing_vals(self.data_mv_df)['mv_cols_ratio'][2], 0.25)
68
        self.assertAlmostEqual(_missing_vals(self.data_mv_df)['mv_cols_ratio'][3], 0.5)
69
70
        # Test if missing value ratio is between 0 and 1
71
        self.assertTrue(0 <= _missing_vals(self.data_mv_df)['mv_cols_ratio'][0] <= 1)
72
        self.assertTrue(0 <= _missing_vals(self.data_mv_df)['mv_cols_ratio'][1] <= 1)
73
        self.assertTrue(0 <= _missing_vals(self.data_mv_df)['mv_cols_ratio'][2] <= 1)
74
        self.assertTrue(0 <= _missing_vals(self.data_mv_df)['mv_cols_ratio'][3] <= 1)
75
76
77
class Test_corr_mat(unittest.TestCase):
78
79
    @classmethod
80
    def setUpClass(cls):
81
        cls.data_corr = pd.DataFrame([[1, 0, 3j, 4],
82
                                      [3, 4, 5, 6],
83
                                      ['a', 'b', pd.NA, 'd'],
84
                                      [5, False, np.nan, pd.NaT]],
85
                                     columns=['Col1', 'Col2', 'Col3', 'Col4'])
86
87
        cls.data_corr_list = [1, 2, -3, 4j, 5, 0]
88
89
    def test_output_type(self):
90
        # Test conversion from pd.io.formats.style.Styler to pd.core.frame.DataFrame
91
        self.assertTrue(type(corr_mat(self.data_corr)), type(pd.DataFrame))
92
        self.assertTrue(type(corr_mat(self.data_corr_list)), type(pd.DataFrame))
93
94
    def test_output_shape(self):
95
        # Test for output of equal dimensions
96
        self.assertEqual(corr_mat(self.data_corr).data.shape[0], corr_mat(self.data_corr).data.shape[1])
97
        self.assertEqual(corr_mat(self.data_corr_list).data.shape[0], corr_mat(self.data_corr_list).data.shape[1])
98