GitHub Access Token became invalid

It seems like the GitHub access token used for retrieving details about this repository from GitHub became invalid. This might prevent certain types of inspections from being run (in particular, everything related to pull requests).
Please ask an admin of your repository to re-new the access token on this website.
Passed
Branch master (5deb01)
by Andreas
02:32
created

klib.tests.test_clean   A

Complexity

Total Complexity 11

Size/Duplication

Total Lines 85
Duplicated Lines 0 %

Importance

Changes 0
Metric Value
eloc 66
dl 0
loc 85
rs 10
c 0
b 0
f 0
wmc 11

6 Methods

Rating   Name   Duplication   Size   Complexity  
B Test_convert_dtypes.test_convert_dtypes() 0 23 6
A Test_drop_missing.setUpClass() 0 8 1
A Test_drop_missing.test_drop_missing() 0 12 1
A Test_convert_dtypes.setUpClass() 0 8 1
A Test_pool_duplicate_subsets.test_pool_duplicate_subsets() 0 4 1
A Test_pool_duplicate_subsets.setUpClass() 0 8 1
1
import numpy as np
2
import pandas as pd
3
import unittest
4
from ..clean import (drop_missing,
5
                     convert_datatypes,
6
                     pool_duplicate_subsets)
7
8
9
class Test_drop_missing(unittest.TestCase):
10
11
    @classmethod
12
    def setUpClass(cls):
13
        cls.df_data_drop = pd.DataFrame([[np.nan, np.nan, np.nan, np.nan, np.nan],
14
                                         [pd.NA, pd.NA, pd.NA, pd.NA, pd.NA],
15
                                         [pd.NA, 'b', 'c', 'd', 'e'],
16
                                         [pd.NA, 6, 7, 8, 9],
17
                                         [pd.NA, 2, 3, 4, pd.NA],
18
                                         [pd.NA, 6, 7, pd.NA, pd.NA]])
19
20
    def test_drop_missing(self):
21
        self.assertEqual(drop_missing(self.df_data_drop).shape, (4, 4))
22
23
        # Drop further columns based on threshold
24
        self.assertEqual(drop_missing(self.df_data_drop, drop_threshold_cols=0.5).shape, (4, 4))
25
        self.assertEqual(drop_missing(self.df_data_drop, drop_threshold_cols=0.49).shape, (4, 3))
26
        self.assertEqual(drop_missing(self.df_data_drop, drop_threshold_cols=0).shape, (4, 2))
27
28
        # Drop further rows based on threshold
29
        self.assertEqual(drop_missing(self.df_data_drop, drop_threshold_rows=0.5).shape, (4, 4))
30
        self.assertEqual(drop_missing(self.df_data_drop, drop_threshold_rows=0.49).shape, (3, 4))
31
        self.assertEqual(drop_missing(self.df_data_drop, drop_threshold_rows=0).shape, (2, 4))
32
33
34
class Test_convert_dtypes(unittest.TestCase):
35
36
    @classmethod
37
    def setUpClass(cls):
38
        cls.df_data_convert = pd.DataFrame([[1, 7.0, 'y', 'x', pd.NA, 'v'],
39
                                            [3, 8.0, 'd', 'e', pd.NA, 'v'],
40
                                            [5, 7.0, 'o', 'z', pd.NA, 'v'],
41
                                            [1, 7.0, 'u', 'f', pd.NA, 'p'],
42
                                            [1, 7.0, 'u', 'f', pd.NA, 'p'],
43
                                            [2, 7.0, 'g', 'a', pd.NA, 'p']])
44
45
    def test_convert_dtypes(self):
46
        expected_results = ['Int8', 'Float32', 'string', 'string', 'category', 'category']
47
        for i, _ in enumerate(expected_results):
48
            self.assertEqual(convert_datatypes(self.df_data_convert, cat_threshold=0.4).dtypes[i], expected_results[i])
49
50
        expected_results = ['Int8', 'Float32', 'string', 'string', 'object', 'string']
51
        for i, _ in enumerate(expected_results):
52
            self.assertEqual(convert_datatypes(self.df_data_convert).dtypes[i], expected_results[i])
53
54
        expected_results = ['Int8', 'Float32', 'string', 'string', 'object', 'category']
55
        for i, _ in enumerate(expected_results):
56
            self.assertEqual(convert_datatypes(self.df_data_convert, cat_threshold=0.5,
57
                                               cat_exclude=[4]).dtypes[i], expected_results[i])
58
59
        expected_results = ['Int8', 'Float32', 'string', 'category', 'object', 'category']
60
        for i, _ in enumerate(expected_results):
61
            self.assertEqual(convert_datatypes(self.df_data_convert, cat_threshold=0.95,
62
                                               cat_exclude=[2, 4]).dtypes[i], expected_results[i])
63
64
        expected_results = ['Int8', 'Float32', 'string', 'string', 'object', 'string']
65
        for i, _ in enumerate(expected_results):
66
            self.assertEqual(convert_datatypes(self.df_data_convert, category=False,
67
                                               cat_threshold=0.95, cat_exclude=[2, 4]).dtypes[i], expected_results[i])
68
69
70
class Test_pool_duplicate_subsets(unittest.TestCase):
71
72
    @classmethod
73
    def setUpClass(cls):
74
        cls.df_data_subsets = pd.DataFrame([[1, 7, 'd', 'x', pd.NA, 'v'],
75
                                            [1, 8, 'd', 'e', pd.NA, 'v'],
76
                                            [2, 7, 'g', 'z', pd.NA, 'v'],
77
                                            [1, 7, 'u', 'f', pd.NA, 'p'],
78
                                            [1, 7, 'u', 'z', pd.NA, 'p'],
79
                                            [2, 7, 'g', 'z', pd.NA, 'p']])
80
81
    def test_pool_duplicate_subsets(self):
82
        self.assertEqual(pool_duplicate_subsets(self.df_data_subsets).shape, (6, 3))
83
        self.assertEqual(pool_duplicate_subsets(self.df_data_subsets, col_dupl_thresh=1).shape, (6, 6))
84
        self.assertEqual(pool_duplicate_subsets(self.df_data_subsets, subset_thresh=0).shape, (6, 2))
85