GitHub Access Token became invalid

It seems like the GitHub access token used for retrieving details about this repository from GitHub became invalid. This might prevent certain types of inspections from being run (in particular, everything related to pull requests).
Please ask an admin of your repository to re-new the access token on this website.
Passed
Branch master (5deb01)
by Andreas
02:32
created

Test_pool_duplicate_subsets.setUpClass()   A

Complexity

Conditions 1

Size

Total Lines 8
Code Lines 8

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
cc 1
eloc 8
nop 1
dl 0
loc 8
rs 10
c 0
b 0
f 0
1
import numpy as np
2
import pandas as pd
3
import unittest
4
from ..clean import (drop_missing,
5
                     convert_datatypes,
6
                     pool_duplicate_subsets)
7
8
9
class Test_drop_missing(unittest.TestCase):
10
11
    @classmethod
12
    def setUpClass(cls):
13
        cls.df_data_drop = pd.DataFrame([[np.nan, np.nan, np.nan, np.nan, np.nan],
14
                                         [pd.NA, pd.NA, pd.NA, pd.NA, pd.NA],
15
                                         [pd.NA, 'b', 'c', 'd', 'e'],
16
                                         [pd.NA, 6, 7, 8, 9],
17
                                         [pd.NA, 2, 3, 4, pd.NA],
18
                                         [pd.NA, 6, 7, pd.NA, pd.NA]])
19
20
    def test_drop_missing(self):
21
        self.assertEqual(drop_missing(self.df_data_drop).shape, (4, 4))
22
23
        # Drop further columns based on threshold
24
        self.assertEqual(drop_missing(self.df_data_drop, drop_threshold_cols=0.5).shape, (4, 4))
25
        self.assertEqual(drop_missing(self.df_data_drop, drop_threshold_cols=0.49).shape, (4, 3))
26
        self.assertEqual(drop_missing(self.df_data_drop, drop_threshold_cols=0).shape, (4, 2))
27
28
        # Drop further rows based on threshold
29
        self.assertEqual(drop_missing(self.df_data_drop, drop_threshold_rows=0.5).shape, (4, 4))
30
        self.assertEqual(drop_missing(self.df_data_drop, drop_threshold_rows=0.49).shape, (3, 4))
31
        self.assertEqual(drop_missing(self.df_data_drop, drop_threshold_rows=0).shape, (2, 4))
32
33
34
class Test_convert_dtypes(unittest.TestCase):
35
36
    @classmethod
37
    def setUpClass(cls):
38
        cls.df_data_convert = pd.DataFrame([[1, 7.0, 'y', 'x', pd.NA, 'v'],
39
                                            [3, 8.0, 'd', 'e', pd.NA, 'v'],
40
                                            [5, 7.0, 'o', 'z', pd.NA, 'v'],
41
                                            [1, 7.0, 'u', 'f', pd.NA, 'p'],
42
                                            [1, 7.0, 'u', 'f', pd.NA, 'p'],
43
                                            [2, 7.0, 'g', 'a', pd.NA, 'p']])
44
45
    def test_convert_dtypes(self):
46
        expected_results = ['Int8', 'Float32', 'string', 'string', 'category', 'category']
47
        for i, _ in enumerate(expected_results):
48
            self.assertEqual(convert_datatypes(self.df_data_convert, cat_threshold=0.4).dtypes[i], expected_results[i])
49
50
        expected_results = ['Int8', 'Float32', 'string', 'string', 'object', 'string']
51
        for i, _ in enumerate(expected_results):
52
            self.assertEqual(convert_datatypes(self.df_data_convert).dtypes[i], expected_results[i])
53
54
        expected_results = ['Int8', 'Float32', 'string', 'string', 'object', 'category']
55
        for i, _ in enumerate(expected_results):
56
            self.assertEqual(convert_datatypes(self.df_data_convert, cat_threshold=0.5,
57
                                               cat_exclude=[4]).dtypes[i], expected_results[i])
58
59
        expected_results = ['Int8', 'Float32', 'string', 'category', 'object', 'category']
60
        for i, _ in enumerate(expected_results):
61
            self.assertEqual(convert_datatypes(self.df_data_convert, cat_threshold=0.95,
62
                                               cat_exclude=[2, 4]).dtypes[i], expected_results[i])
63
64
        expected_results = ['Int8', 'Float32', 'string', 'string', 'object', 'string']
65
        for i, _ in enumerate(expected_results):
66
            self.assertEqual(convert_datatypes(self.df_data_convert, category=False,
67
                                               cat_threshold=0.95, cat_exclude=[2, 4]).dtypes[i], expected_results[i])
68
69
70
class Test_pool_duplicate_subsets(unittest.TestCase):
71
72
    @classmethod
73
    def setUpClass(cls):
74
        cls.df_data_subsets = pd.DataFrame([[1, 7, 'd', 'x', pd.NA, 'v'],
75
                                            [1, 8, 'd', 'e', pd.NA, 'v'],
76
                                            [2, 7, 'g', 'z', pd.NA, 'v'],
77
                                            [1, 7, 'u', 'f', pd.NA, 'p'],
78
                                            [1, 7, 'u', 'z', pd.NA, 'p'],
79
                                            [2, 7, 'g', 'z', pd.NA, 'p']])
80
81
    def test_pool_duplicate_subsets(self):
82
        self.assertEqual(pool_duplicate_subsets(self.df_data_subsets).shape, (6, 3))
83
        self.assertEqual(pool_duplicate_subsets(self.df_data_subsets, col_dupl_thresh=1).shape, (6, 6))
84
        self.assertEqual(pool_duplicate_subsets(self.df_data_subsets, subset_thresh=0).shape, (6, 2))
85