klib.tests.test_util.Test__missing_vals.test_mv_cols() - Code Metrics - Inspection of "tests for _corr_selector" - akanz1/klib - Measure and Improve Code Quality continuously with Scrutinizer

Passed

Push — master ( b8c70d...3ac3d9 )

by Andreas

created 2020-04-16 18:40 UTC

Test__missing_vals.test_mv_cols() A

↳ Parent: klib.tests.test_util

Complexity

Conditions

Size

Total Lines	5
Code Lines	4

Duplication

Lines	0
Ratio	0 %

Importance

Changes

Metric	Value
cc	2
eloc	4
nop	1
dl	0
loc	5
rs	10
c	0
b	0
f	0

import numpy as np
import pandas as pd
import unittest
from klib.utils import _corr_selector
from klib.utils import _drop_duplicates
from klib.utils import _missing_vals
from klib.utils import _validate_input_0_1
from klib.utils import _validate_input_bool

if __name__ == '__main__':
    unittest.main()


class Test__corr_selector(unittest.TestCase):

    @classmethod
    def setUpClass(cls):
        cls.df_data_corr = pd.DataFrame([[1, 7, 2, 2, 4, 7],
                                         [3, 8, 3, 3, 7, 1],
                                         [5, 7, 9, 5, 1, 4],
                                         [1, 7, 8, 6, 1, 8],
                                         [1, 7, 5, 6, 2, 6],
                                         [2, 7, 3, 3, 5, 3]])

        cls.target = pd.Series([1, 2, 4, 7, 4, 2])

    def test__corr_selector_matrix(self):
        self.assertEqual(_corr_selector(self.df_data_corr.corr()).shape, (6, 6))
        self.assertEqual(_corr_selector(self.df_data_corr.corr(), split='pos').isna().sum().sum(), 18)
        self.assertEqual(_corr_selector(self.df_data_corr.corr(), split='pos', threshold=0.5).isna().sum().sum(), 26)
        self.assertEqual(_corr_selector(self.df_data_corr.corr(), split='neg', threshold=-0.75).isna().sum().sum(), 32)
        self.assertEqual(_corr_selector(self.df_data_corr.corr(), split='high', threshold=0.15).isna().sum().sum(), 4)
        self.assertEqual(_corr_selector(self.df_data_corr.corr(), split='low', threshold=0.85).isna().sum().sum(), 6)

    def test__corr_selector_label(self):
        self.assertEqual(_corr_selector(self.df_data_corr.corrwith(self.target)).shape, (6, ))
        self.assertEqual(_corr_selector(self.df_data_corr.corrwith(self.target), split='pos').isna().sum(), 3)
        self.assertEqual(_corr_selector(self.df_data_corr.corrwith(
            self.target), split='pos', threshold=0.8).isna().sum(), 4)
        self.assertEqual(_corr_selector(self.df_data_corr.corrwith(
            self.target), split='neg', threshold=-0.7).isna().sum(), 5)
        self.assertEqual(_corr_selector(self.df_data_corr.corrwith(
            self.target), split='high', threshold=0.2).isna().sum(), 1)
        self.assertEqual(_corr_selector(self.df_data_corr.corrwith(
            self.target), split='low', threshold=0.8).isna().sum(), 2)


class Test__drop_duplicates(unittest.TestCase):

    @classmethod
    def setUpClass(cls):
        cls.data_dupl_df = pd.DataFrame([[pd.NA, pd.NA, pd.NA, pd.NA],
                                         [1, 2, 3, 4],
                                         [1, 2, 3, 4],
                                         [1, 2, 3, 4],
                                         [2, 3, 4, 5],
                                         [1, 2, 3, pd.NA],
                                         [pd.NA, pd.NA, pd.NA, pd.NA]])

    def test__drop_dupl(self):
        # Test dropping of duplicate rows
        self.assertAlmostEqual(_drop_duplicates(self.data_dupl_df)[0].shape, (4, 4))
        # Test if the resulting DataFrame is equal to using the pandas method
        self.assertTrue(_drop_duplicates(self.data_dupl_df)[0].equals(self.data_dupl_df.drop_duplicates()))
        # Test number of duplicates
        self.assertEqual(len(_drop_duplicates(self.data_dupl_df)[1]), 3)


class Test__missing_vals(unittest.TestCase):

    @classmethod
    def setUpClass(cls):
        cls.data_mv_df = pd.DataFrame([[1, np.nan, 3, 4],
                                       [None, 4, 5, None],
                                       ['a', 'b', pd.NA, 'd'],
                                       [True, False, 7, pd.NaT]])

        cls.data_mv_array = np.array([[1, np.nan, 3, 4],
                                      [None, 4, 5, None],
                                      ['a', 'b', pd.NA, 'd'],
                                      [True, False, 7, pd.NaT]])

        cls.data_mv_list = [[1, np.nan, 3, 4],
                            [None, 4, 5, None],
                            ['a', 'b', pd.NA, 'd'],
                            [True, False, 7, pd.NaT]]

    def test_mv_total(self):
        # Test total missing values
        self.assertAlmostEqual(_missing_vals(self.data_mv_df)['mv_total'], 5)
        self.assertAlmostEqual(_missing_vals(self.data_mv_array)['mv_total'], 5)
        self.assertAlmostEqual(_missing_vals(self.data_mv_list)['mv_total'], 5)

    def test_mv_rows(self):
        # Test missing values for each row
        expected_results = [1, 2, 1, 1]
        for i, _ in enumerate(expected_results):
            self.assertAlmostEqual(_missing_vals(self.data_mv_df)['mv_rows'][i], expected_results[i])

    def test_mv_cols(self):
        # Test missing values for each column
        expected_results = [1, 1, 1, 2]
        for i, _ in enumerate(expected_results):
            self.assertAlmostEqual(_missing_vals(self.data_mv_df)['mv_cols'][i], expected_results[i])

    def test_mv_rows_ratio(self):
        # Test missing values ratio for each row
        expected_results = [0.25, 0.5, 0.25, 0.25]
        for i, _ in enumerate(expected_results):
            self.assertAlmostEqual(_missing_vals(self.data_mv_df)['mv_rows_ratio'][i], expected_results[i])

        # Test if missing value ratio is between 0 and 1
        for i in range(len(self.data_mv_df)):
            self.assertTrue(0 <= _missing_vals(self.data_mv_df)['mv_rows_ratio'][i] <= 1)

    def test_mv_cols_ratio(self):
        # Test missing values ratio for each column
        expected_results = [1/4, 0.25, 0.25, 0.5]
        for i, _ in enumerate(expected_results):
            self.assertAlmostEqual(_missing_vals(self.data_mv_df)['mv_cols_ratio'][i], expected_results[i])

        # Test if missing value ratio is between 0 and 1
        for i in range(len(self.data_mv_df)):
            self.assertTrue(0 <= _missing_vals(self.data_mv_df)['mv_cols_ratio'][i] <= 1)


class Test__validate_input(unittest.TestCase):

    def test__validate_input_0_1(self):
        with self.assertRaises(ValueError):
            _validate_input_0_1(-0.1, '-0.1')

        with self.assertRaises(ValueError):
            _validate_input_0_1(1.1, '1.1')

    def test__validate_input_bool(self):
        # Raises an exception if the input is not boolean
        with self.assertRaises(ValueError):
            _validate_input_bool('True', None)
        with self.assertRaises(ValueError):
            _validate_input_bool(None, None)
        with self.assertRaises(ValueError):
            _validate_input_bool(1, None)


1			import numpy as np
2			import pandas as pd
3			import unittest
4			from klib.utils import _corr_selector
5			from klib.utils import _drop_duplicates
6			from klib.utils import _missing_vals
7			from klib.utils import _validate_input_0_1
8			from klib.utils import _validate_input_bool
9
10			if __name__ == '__main__':
11			unittest.main()
12
13
14			class Test__corr_selector(unittest.TestCase):
15
16			@classmethod
17			def setUpClass(cls):
18			cls.df_data_corr = pd.DataFrame([[1, 7, 2, 2, 4, 7],
19			[3, 8, 3, 3, 7, 1],
20			[5, 7, 9, 5, 1, 4],
21			[1, 7, 8, 6, 1, 8],
22			[1, 7, 5, 6, 2, 6],
23			[2, 7, 3, 3, 5, 3]])
24
25			cls.target = pd.Series([1, 2, 4, 7, 4, 2])
26
27			def test__corr_selector_matrix(self):
28			self.assertEqual(_corr_selector(self.df_data_corr.corr()).shape, (6, 6))
29			self.assertEqual(_corr_selector(self.df_data_corr.corr(), split='pos').isna().sum().sum(), 18)
30			self.assertEqual(_corr_selector(self.df_data_corr.corr(), split='pos', threshold=0.5).isna().sum().sum(), 26)
31			self.assertEqual(_corr_selector(self.df_data_corr.corr(), split='neg', threshold=-0.75).isna().sum().sum(), 32)
32			self.assertEqual(_corr_selector(self.df_data_corr.corr(), split='high', threshold=0.15).isna().sum().sum(), 4)
33			self.assertEqual(_corr_selector(self.df_data_corr.corr(), split='low', threshold=0.85).isna().sum().sum(), 6)
34
35			def test__corr_selector_label(self):
36			self.assertEqual(_corr_selector(self.df_data_corr.corrwith(self.target)).shape, (6, ))
37			self.assertEqual(_corr_selector(self.df_data_corr.corrwith(self.target), split='pos').isna().sum(), 3)
38			self.assertEqual(_corr_selector(self.df_data_corr.corrwith(
39			self.target), split='pos', threshold=0.8).isna().sum(), 4)
40			self.assertEqual(_corr_selector(self.df_data_corr.corrwith(
41			self.target), split='neg', threshold=-0.7).isna().sum(), 5)
42			self.assertEqual(_corr_selector(self.df_data_corr.corrwith(
43			self.target), split='high', threshold=0.2).isna().sum(), 1)
44			self.assertEqual(_corr_selector(self.df_data_corr.corrwith(
45			self.target), split='low', threshold=0.8).isna().sum(), 2)
46
47
48			class Test__drop_duplicates(unittest.TestCase):
49
50			@classmethod
51			def setUpClass(cls):
52			cls.data_dupl_df = pd.DataFrame([[pd.NA, pd.NA, pd.NA, pd.NA],
53			[1, 2, 3, 4],
54			[1, 2, 3, 4],
55			[1, 2, 3, 4],
56			[2, 3, 4, 5],
57			[1, 2, 3, pd.NA],
58			[pd.NA, pd.NA, pd.NA, pd.NA]])
59
60			def test__drop_dupl(self):
61			# Test dropping of duplicate rows
62			self.assertAlmostEqual(_drop_duplicates(self.data_dupl_df)[0].shape, (4, 4))
63			# Test if the resulting DataFrame is equal to using the pandas method
64			self.assertTrue(_drop_duplicates(self.data_dupl_df)[0].equals(self.data_dupl_df.drop_duplicates()))
65			# Test number of duplicates
66			self.assertEqual(len(_drop_duplicates(self.data_dupl_df)[1]), 3)
67
68
69			class Test__missing_vals(unittest.TestCase):
70
71			@classmethod
72			def setUpClass(cls):
73			cls.data_mv_df = pd.DataFrame([[1, np.nan, 3, 4],
74			[None, 4, 5, None],
75			['a', 'b', pd.NA, 'd'],
76			[True, False, 7, pd.NaT]])
77
78			cls.data_mv_array = np.array([[1, np.nan, 3, 4],
79			[None, 4, 5, None],
80			['a', 'b', pd.NA, 'd'],
81			[True, False, 7, pd.NaT]])
82
83			cls.data_mv_list = [[1, np.nan, 3, 4],
84			[None, 4, 5, None],
85			['a', 'b', pd.NA, 'd'],
86			[True, False, 7, pd.NaT]]
87
88			def test_mv_total(self):
89			# Test total missing values
90			self.assertAlmostEqual(_missing_vals(self.data_mv_df)['mv_total'], 5)
91			self.assertAlmostEqual(_missing_vals(self.data_mv_array)['mv_total'], 5)
92			self.assertAlmostEqual(_missing_vals(self.data_mv_list)['mv_total'], 5)
93
94			def test_mv_rows(self):
95			# Test missing values for each row
96			expected_results = [1, 2, 1, 1]
97			for i, _ in enumerate(expected_results):
98			self.assertAlmostEqual(_missing_vals(self.data_mv_df)['mv_rows'][i], expected_results[i])
99
100			def test_mv_cols(self):
101			# Test missing values for each column
102			expected_results = [1, 1, 1, 2]
103			for i, _ in enumerate(expected_results):
104			self.assertAlmostEqual(_missing_vals(self.data_mv_df)['mv_cols'][i], expected_results[i])
105
106			def test_mv_rows_ratio(self):
107			# Test missing values ratio for each row
108			expected_results = [0.25, 0.5, 0.25, 0.25]
109			for i, _ in enumerate(expected_results):
110			self.assertAlmostEqual(_missing_vals(self.data_mv_df)['mv_rows_ratio'][i], expected_results[i])
111
112			# Test if missing value ratio is between 0 and 1
113			for i in range(len(self.data_mv_df)):
114			self.assertTrue(0 <= _missing_vals(self.data_mv_df)['mv_rows_ratio'][i] <= 1)
115
116			def test_mv_cols_ratio(self):
117			# Test missing values ratio for each column
118			expected_results = [1/4, 0.25, 0.25, 0.5]
119			for i, _ in enumerate(expected_results):
120			self.assertAlmostEqual(_missing_vals(self.data_mv_df)['mv_cols_ratio'][i], expected_results[i])
121
122			# Test if missing value ratio is between 0 and 1
123			for i in range(len(self.data_mv_df)):
124			self.assertTrue(0 <= _missing_vals(self.data_mv_df)['mv_cols_ratio'][i] <= 1)
125
126
127			class Test__validate_input(unittest.TestCase):
128
129			def test__validate_input_0_1(self):
130			with self.assertRaises(ValueError):
131			_validate_input_0_1(-0.1, '-0.1')
132
133			with self.assertRaises(ValueError):
134			_validate_input_0_1(1.1, '1.1')
135
136			def test__validate_input_bool(self):
137			# Raises an exception if the input is not boolean
138			with self.assertRaises(ValueError):
139			_validate_input_bool('True', None)
140			with self.assertRaises(ValueError):
141			_validate_input_bool(None, None)
142			with self.assertRaises(ValueError):
143			_validate_input_bool(1, None)
144

akanz1 / klib

GitHub Access Token became invalid

Push — master ( b8c70d...3ac3d9 )

Test__missing_vals.test_mv_cols() A

Complexity

Size

Duplication

Importance

Duplication Side-by-Side

Filter issues like