1
|
|
|
import numpy as np |
2
|
|
|
import pandas as pd |
3
|
|
|
import unittest |
4
|
|
|
from klib.clean import drop_missing |
5
|
|
|
|
6
|
|
|
if __name__ == '__main__': |
7
|
|
|
unittest.main() |
8
|
|
|
|
9
|
|
|
|
10
|
|
|
class Test_drop_missing(unittest.TestCase): |
11
|
|
|
|
12
|
|
|
@classmethod |
13
|
|
|
def setUpClass(cls): |
14
|
|
|
cls.df_data_drop = pd.DataFrame([[np.nan, np.nan, np.nan, np.nan, np.nan], |
15
|
|
|
[pd.NA, pd.NA, pd.NA, pd.NA, pd.NA], |
16
|
|
|
[pd.NA, 'b', 'c', 'd', 'e'], |
17
|
|
|
[pd.NA, 6, 7, 8, 9], |
18
|
|
|
[pd.NA, 2, 3, 4, pd.NA], |
19
|
|
|
[pd.NA, 6, 7, pd.NA, pd.NA]]) |
20
|
|
|
|
21
|
|
|
def test_drop_missing(self): |
22
|
|
|
self.assertEqual(drop_missing(self.df_data_drop).shape, (4, 4)) |
23
|
|
|
|
24
|
|
|
# Drop further columns based on threshold |
25
|
|
|
self.assertEqual(drop_missing(self.df_data_drop, drop_threshold_cols=0.5).shape, (4, 4)) |
26
|
|
|
self.assertEqual(drop_missing(self.df_data_drop, drop_threshold_cols=0.49).shape, (4, 3)) |
27
|
|
|
self.assertEqual(drop_missing(self.df_data_drop, drop_threshold_cols=0).shape, (4, 2)) |
28
|
|
|
|
29
|
|
|
# Drop further rows based on threshold |
30
|
|
|
self.assertEqual(drop_missing(self.df_data_drop, drop_threshold_rows=0.5).shape, (4, 4)) |
31
|
|
|
self.assertEqual(drop_missing(self.df_data_drop, drop_threshold_rows=0.49).shape, (3, 4)) |
32
|
|
|
self.assertEqual(drop_missing(self.df_data_drop, drop_threshold_rows=0).shape, (2, 4)) |
33
|
|
|
|