klib.tests.test_util.Test__validate_input.test__validate_input_smaller() - Code Metrics - Inspection of "minor fixes and check for input range" - akanz1/klib - Measure and Improve Code Quality continuously with Scrutinizer

Passed

Push — master ( 2625ff...cc4c68 )

by Andreas

created 2020-04-25 11:02 UTC

Test__validate_input.test__validate_input_smaller() A

↳ Parent: klib.tests.test_util

Complexity

Conditions

Size

Total Lines	8
Code Lines	7

Duplication

Lines	0
Ratio	0 %

Importance

Changes

Metric	Value
cc	4
eloc	7
nop	1
dl	0
loc	8
rs	10
c	0
b	0
f	0

import numpy as np
import pandas as pd
import unittest
from klib.utils import _corr_selector
from klib.utils import _drop_duplicates
from klib.utils import _missing_vals
from klib.utils import _validate_input_bool
from klib.utils import _validate_input_int
from klib.utils import _validate_input_range
from klib.utils import _validate_input_smaller


if __name__ == '__main__':
    unittest.main()


class Test__corr_selector(unittest.TestCase):

    @classmethod
    def setUpClass(cls):
        cls.df_data_corr = pd.DataFrame([[1, 7, 2, 2, 4, 7],
                                         [3, 8, 3, 3, 7, 1],
                                         [5, 7, 9, 5, 1, 4],
                                         [1, 7, 8, 6, 1, 8],
                                         [1, 7, 5, 6, 2, 6],
                                         [2, 7, 3, 3, 5, 3]])

        cls.target = pd.Series([1, 2, 4, 7, 4, 2])

    def test__corr_selector_matrix(self):
        self.assertEqual(_corr_selector(self.df_data_corr.corr()).shape, (6, 6))
        self.assertEqual(_corr_selector(self.df_data_corr.corr(), split='pos').isna().sum().sum(), 18)
        self.assertEqual(_corr_selector(self.df_data_corr.corr(), split='pos', threshold=0.5).isna().sum().sum(), 26)
        self.assertEqual(_corr_selector(self.df_data_corr.corr(), split='neg', threshold=-0.75).isna().sum().sum(), 32)
        self.assertEqual(_corr_selector(self.df_data_corr.corr(), split='high', threshold=0.15).isna().sum().sum(), 4)
        self.assertEqual(_corr_selector(self.df_data_corr.corr(), split='low', threshold=0.85).isna().sum().sum(), 6)

    def test__corr_selector_label(self):
        self.assertEqual(_corr_selector(self.df_data_corr.corrwith(self.target)).shape, (6, ))
        self.assertEqual(_corr_selector(self.df_data_corr.corrwith(self.target), split='pos').isna().sum(), 3)
        self.assertEqual(_corr_selector(self.df_data_corr.corrwith(
            self.target), split='pos', threshold=0.8).isna().sum(), 4)
        self.assertEqual(_corr_selector(self.df_data_corr.corrwith(
            self.target), split='neg', threshold=-0.7).isna().sum(), 5)
        self.assertEqual(_corr_selector(self.df_data_corr.corrwith(
            self.target), split='high', threshold=0.2).isna().sum(), 1)
        self.assertEqual(_corr_selector(self.df_data_corr.corrwith(
            self.target), split='low', threshold=0.8).isna().sum(), 2)


class Test__drop_duplicates(unittest.TestCase):

    @classmethod
    def setUpClass(cls):
        cls.data_dupl_df = pd.DataFrame([[pd.NA, pd.NA, pd.NA, pd.NA],
                                         [1, 2, 3, 4],
                                         [1, 2, 3, 4],
                                         [1, 2, 3, 4],
                                         [2, 3, 4, 5],
                                         [1, 2, 3, pd.NA],
                                         [pd.NA, pd.NA, pd.NA, pd.NA]])

    def test__drop_dupl(self):
        # Test dropping of duplicate rows
        self.assertAlmostEqual(_drop_duplicates(self.data_dupl_df)[0].shape, (4, 4))
        # Test if the resulting DataFrame is equal to using the pandas method
        self.assertTrue(_drop_duplicates(self.data_dupl_df)[0].equals(self.data_dupl_df.drop_duplicates()))
        # Test number of duplicates
        self.assertEqual(len(_drop_duplicates(self.data_dupl_df)[1]), 3)


class Test__missing_vals(unittest.TestCase):

    @classmethod
    def setUpClass(cls):
        cls.data_mv_list = [[1, np.nan, 3, 4],
                            [None, 4, 5, None],
                            ['a', 'b', pd.NA, 'd'],
                            [True, False, 7, pd.NaT]]

        cls.data_mv_df = pd.DataFrame(cls.data_mv_list)

        cls.data_mv_array = np.array(cls.data_mv_list)

    def test_mv_total(self):
        # Test total missing values
        self.assertAlmostEqual(_missing_vals(self.data_mv_df)['mv_total'], 5)
        self.assertAlmostEqual(_missing_vals(self.data_mv_array)['mv_total'], 5)
        self.assertAlmostEqual(_missing_vals(self.data_mv_list)['mv_total'], 5)

    def test_mv_rows(self):
        # Test missing values for each row
        expected_results = [1, 2, 1, 1]
        for i, _ in enumerate(expected_results):
            self.assertAlmostEqual(_missing_vals(self.data_mv_df)['mv_rows'][i], expected_results[i])

    def test_mv_cols(self):
        # Test missing values for each column
        expected_results = [1, 1, 1, 2]
        for i, _ in enumerate(expected_results):
            self.assertAlmostEqual(_missing_vals(self.data_mv_df)['mv_cols'][i], expected_results[i])

    def test_mv_rows_ratio(self):
        # Test missing values ratio for each row
        expected_results = [0.25, 0.5, 0.25, 0.25]
        for i, _ in enumerate(expected_results):
            self.assertAlmostEqual(_missing_vals(self.data_mv_df)['mv_rows_ratio'][i], expected_results[i])

        # Test if missing value ratio is between 0 and 1
        for i in range(len(self.data_mv_df)):
            self.assertTrue(0 <= _missing_vals(self.data_mv_df)['mv_rows_ratio'][i] <= 1)

    def test_mv_cols_ratio(self):
        # Test missing values ratio for each column
        expected_results = [1/4, 0.25, 0.25, 0.5]
        for i, _ in enumerate(expected_results):
            self.assertAlmostEqual(_missing_vals(self.data_mv_df)['mv_cols_ratio'][i], expected_results[i])

        # Test if missing value ratio is between 0 and 1
        for i in range(len(self.data_mv_df)):
            self.assertTrue(0 <= _missing_vals(self.data_mv_df)['mv_cols_ratio'][i] <= 1)


class Test__validate_input(unittest.TestCase):

    def test__validate_input_bool(self):
        # Raises an exception if the input is not boolean
        with self.assertRaises(TypeError):
            _validate_input_bool('True', None)
        with self.assertRaises(TypeError):
            _validate_input_bool(None, None)
        with self.assertRaises(TypeError):
            _validate_input_bool(1, None)

    def test__validate_input_int(self):
        # Raises an exception if the input is not an integer
        with self.assertRaises(TypeError):
            _validate_input_int(1.1, None)
        with self.assertRaises(TypeError):
            _validate_input_int(True, None)
        with self.assertRaises(TypeError):
            _validate_input_int([1], None)
        with self.assertRaises(TypeError):
            _validate_input_int('1', None)

    def test__validate_input_smaller(self):
        # Raises an exception if the first value is larger than the second
        with self.assertRaises(ValueError):
            _validate_input_smaller(0.3, 0.2, None)
        with self.assertRaises(ValueError):
            _validate_input_smaller(3, 2, None)
        with self.assertRaises(ValueError):
            _validate_input_smaller(5, -3, None)

    def test__validate_input_range(self):
        with self.assertRaises(ValueError):
            _validate_input_range(-0.1, 'value -0.1', 0, 1)

        with self.assertRaises(ValueError):
            _validate_input_range(1.1, 'value 1.1', 0, 1)

        with self.assertRaises(TypeError):
            _validate_input_range('1', 'value string', 0, 1)


1			import numpy as np
2			import pandas as pd
3			import unittest
4			from klib.utils import _corr_selector
5			from klib.utils import _drop_duplicates
6			from klib.utils import _missing_vals
7			from klib.utils import _validate_input_bool
8			from klib.utils import _validate_input_int
9			from klib.utils import _validate_input_range
10			from klib.utils import _validate_input_smaller
11
12
13			if __name__ == '__main__':
14			unittest.main()
15
16
17			class Test__corr_selector(unittest.TestCase):
18
19			@classmethod
20			def setUpClass(cls):
21			cls.df_data_corr = pd.DataFrame([[1, 7, 2, 2, 4, 7],
22			[3, 8, 3, 3, 7, 1],
23			[5, 7, 9, 5, 1, 4],
24			[1, 7, 8, 6, 1, 8],
25			[1, 7, 5, 6, 2, 6],
26			[2, 7, 3, 3, 5, 3]])
27
28			cls.target = pd.Series([1, 2, 4, 7, 4, 2])
29
30			def test__corr_selector_matrix(self):
31			self.assertEqual(_corr_selector(self.df_data_corr.corr()).shape, (6, 6))
32			self.assertEqual(_corr_selector(self.df_data_corr.corr(), split='pos').isna().sum().sum(), 18)
33			self.assertEqual(_corr_selector(self.df_data_corr.corr(), split='pos', threshold=0.5).isna().sum().sum(), 26)
34			self.assertEqual(_corr_selector(self.df_data_corr.corr(), split='neg', threshold=-0.75).isna().sum().sum(), 32)
35			self.assertEqual(_corr_selector(self.df_data_corr.corr(), split='high', threshold=0.15).isna().sum().sum(), 4)
36			self.assertEqual(_corr_selector(self.df_data_corr.corr(), split='low', threshold=0.85).isna().sum().sum(), 6)
37
38			def test__corr_selector_label(self):
39			self.assertEqual(_corr_selector(self.df_data_corr.corrwith(self.target)).shape, (6, ))
40			self.assertEqual(_corr_selector(self.df_data_corr.corrwith(self.target), split='pos').isna().sum(), 3)
41			self.assertEqual(_corr_selector(self.df_data_corr.corrwith(
42			self.target), split='pos', threshold=0.8).isna().sum(), 4)
43			self.assertEqual(_corr_selector(self.df_data_corr.corrwith(
44			self.target), split='neg', threshold=-0.7).isna().sum(), 5)
45			self.assertEqual(_corr_selector(self.df_data_corr.corrwith(
46			self.target), split='high', threshold=0.2).isna().sum(), 1)
47			self.assertEqual(_corr_selector(self.df_data_corr.corrwith(
48			self.target), split='low', threshold=0.8).isna().sum(), 2)
49
50
51			class Test__drop_duplicates(unittest.TestCase):
52
53			@classmethod
54			def setUpClass(cls):
55			cls.data_dupl_df = pd.DataFrame([[pd.NA, pd.NA, pd.NA, pd.NA],
56			[1, 2, 3, 4],
57			[1, 2, 3, 4],
58			[1, 2, 3, 4],
59			[2, 3, 4, 5],
60			[1, 2, 3, pd.NA],
61			[pd.NA, pd.NA, pd.NA, pd.NA]])
62
63			def test__drop_dupl(self):
64			# Test dropping of duplicate rows
65			self.assertAlmostEqual(_drop_duplicates(self.data_dupl_df)[0].shape, (4, 4))
66			# Test if the resulting DataFrame is equal to using the pandas method
67			self.assertTrue(_drop_duplicates(self.data_dupl_df)[0].equals(self.data_dupl_df.drop_duplicates()))
68			# Test number of duplicates
69			self.assertEqual(len(_drop_duplicates(self.data_dupl_df)[1]), 3)
70
71
72			class Test__missing_vals(unittest.TestCase):
73
74			@classmethod
75			def setUpClass(cls):
76			cls.data_mv_list = [[1, np.nan, 3, 4],
77			[None, 4, 5, None],
78			['a', 'b', pd.NA, 'd'],
79			[True, False, 7, pd.NaT]]
80
81			cls.data_mv_df = pd.DataFrame(cls.data_mv_list)
82
83			cls.data_mv_array = np.array(cls.data_mv_list)
84
85			def test_mv_total(self):
86			# Test total missing values
87			self.assertAlmostEqual(_missing_vals(self.data_mv_df)['mv_total'], 5)
88			self.assertAlmostEqual(_missing_vals(self.data_mv_array)['mv_total'], 5)
89			self.assertAlmostEqual(_missing_vals(self.data_mv_list)['mv_total'], 5)
90
91			def test_mv_rows(self):
92			# Test missing values for each row
93			expected_results = [1, 2, 1, 1]
94			for i, _ in enumerate(expected_results):
95			self.assertAlmostEqual(_missing_vals(self.data_mv_df)['mv_rows'][i], expected_results[i])
96
97			def test_mv_cols(self):
98			# Test missing values for each column
99			expected_results = [1, 1, 1, 2]
100			for i, _ in enumerate(expected_results):
101			self.assertAlmostEqual(_missing_vals(self.data_mv_df)['mv_cols'][i], expected_results[i])
102
103			def test_mv_rows_ratio(self):
104			# Test missing values ratio for each row
105			expected_results = [0.25, 0.5, 0.25, 0.25]
106			for i, _ in enumerate(expected_results):
107			self.assertAlmostEqual(_missing_vals(self.data_mv_df)['mv_rows_ratio'][i], expected_results[i])
108
109			# Test if missing value ratio is between 0 and 1
110			for i in range(len(self.data_mv_df)):
111			self.assertTrue(0 <= _missing_vals(self.data_mv_df)['mv_rows_ratio'][i] <= 1)
112
113			def test_mv_cols_ratio(self):
114			# Test missing values ratio for each column
115			expected_results = [1/4, 0.25, 0.25, 0.5]
116			for i, _ in enumerate(expected_results):
117			self.assertAlmostEqual(_missing_vals(self.data_mv_df)['mv_cols_ratio'][i], expected_results[i])
118
119			# Test if missing value ratio is between 0 and 1
120			for i in range(len(self.data_mv_df)):
121			self.assertTrue(0 <= _missing_vals(self.data_mv_df)['mv_cols_ratio'][i] <= 1)
122
123
124			class Test__validate_input(unittest.TestCase):
125
126			def test__validate_input_bool(self):
127			# Raises an exception if the input is not boolean
128			with self.assertRaises(TypeError):
129			_validate_input_bool('True', None)
130			with self.assertRaises(TypeError):
131			_validate_input_bool(None, None)
132			with self.assertRaises(TypeError):
133			_validate_input_bool(1, None)
134
135			def test__validate_input_int(self):
136			# Raises an exception if the input is not an integer
137			with self.assertRaises(TypeError):
138			_validate_input_int(1.1, None)
139			with self.assertRaises(TypeError):
140			_validate_input_int(True, None)
141			with self.assertRaises(TypeError):
142			_validate_input_int([1], None)
143			with self.assertRaises(TypeError):
144			_validate_input_int('1', None)
145
146			def test__validate_input_smaller(self):
147			# Raises an exception if the first value is larger than the second
148			with self.assertRaises(ValueError):
149			_validate_input_smaller(0.3, 0.2, None)
150			with self.assertRaises(ValueError):
151			_validate_input_smaller(3, 2, None)
152			with self.assertRaises(ValueError):
153			_validate_input_smaller(5, -3, None)
154
155			def test__validate_input_range(self):
156			with self.assertRaises(ValueError):
157			_validate_input_range(-0.1, 'value -0.1', 0, 1)
158
159			with self.assertRaises(ValueError):
160			_validate_input_range(1.1, 'value 1.1', 0, 1)
161
162			with self.assertRaises(TypeError):
163			_validate_input_range('1', 'value string', 0, 1)
164

akanz1 / klib

GitHub Access Token became invalid

Push — master ( 2625ff...cc4c68 )

Test__validate_input.test__validate_input_smaller() A

Complexity

Size

Duplication

Importance

Duplication Side-by-Side

Filter issues like