GitHub Access Token became invalid

It seems like the GitHub access token used for retrieving details about this repository from GitHub became invalid. This might prevent certain types of inspections from being run (in particular, everything related to pull requests).
Please ask an admin of your repository to re-new the access token on this website.
Test Failed
Pull Request — main (#17)
by Andreas
06:37
created

tests.test_preprocess   A

Complexity

Total Complexity 9

Size/Duplication

Total Lines 89
Duplicated Lines 66.29 %

Importance

Changes 0
Metric Value
eloc 69
dl 59
loc 89
rs 10
c 0
b 0
f 0
wmc 9

3 Methods

Rating   Name   Duplication   Size   Complexity  
A Test_train_dev_test_split.test_train_dev_test_split_series() 31 31 4
A Test_train_dev_test_split.setUpClass() 0 18 1
A Test_train_dev_test_split.test_train_dev_test_split_col() 28 28 4

How to fix   Duplicated Code   

Duplicated Code

Duplicate code is one of the most pungent code smells. A rule that is often used is to re-structure code once it is duplicated in three or more places.

Common duplication problems, and corresponding solutions are:

1
import unittest
2
3
import pandas as pd
4
5
from klib.preprocess import train_dev_test_split
6
7
8
class Test_train_dev_test_split(unittest.TestCase):
9
    @classmethod
10
    def setUpClass(cls):
11
        cls.data_split = pd.DataFrame(
12
            [
13
                [1, 2, 3, 4, "a"],
14
                [2, 4, 5, 6, "b"],
15
                [3, 4, 2, 1, "c"],
16
                [4, 0, 3, 4, "a"],
17
                [5, 4, 5, 6, "b"],
18
                [6, 4, 2, 1, "c"],
19
                [7, 0, 3, 4, "a"],
20
                [8, 4, 5, 6, "b"],
21
                [9, 4, 2, 1, "c"],
22
                [10, 2, 1, 5, "b"],
23
            ],
24
            columns=["Col1", "Col2", "Col3", "Col4", "Col5"],
25
        )
26
        cls.data_target = pd.Series([1, 0, 1, 0, 0, 1, 1, 0, 1, 1])
27
28 View Code Duplication
    def test_train_dev_test_split_col(self):
29
        # Test the propper splitting in train, dev and test sets
30
31
        expected_results = [(8, 4), (1, 4), (1, 4), (8,), (1,), (1,)]
32
        for i, _ in enumerate(expected_results):
33
            self.assertEqual(
34
                train_dev_test_split(self.data_split, "Col2", random_state=1234)[
35
                    i
36
                ].shape,
37
                expected_results[i],
38
            )
39
40
        expected_results = [(8, 4), (2, 4), (8,), (2,)]
41
        for i, _ in enumerate(expected_results):
42
            self.assertEqual(
43
                train_dev_test_split(
44
                    self.data_split, target="Col2", dev_size=0, test_size=0.2
45
                )[i].shape,
46
                expected_results[i],
47
            )
48
49
        expected_results = [(5, 4), (5, 4), (5,), (5,)]
50
        for i, _ in enumerate(expected_results):
51
            self.assertEqual(
52
                train_dev_test_split(
53
                    self.data_split, target="Col2", dev_size=0.5, test_size=0
54
                )[i].shape,
55
                expected_results[i],
56
            )
57
58 View Code Duplication
    def test_train_dev_test_split_series(self):
59
        # Test the propper splitting in train, dev and test sets
60
61
        expected_results = [(6, 5), (2, 5), (2, 5), (6,), (2,), (2,)]
62
        for i, _ in enumerate(expected_results):
63
            self.assertEqual(
64
                train_dev_test_split(
65
                    self.data_split,
66
                    target=self.data_target,
67
                    dev_size=0.2,
68
                    test_size=0.2,
69
                )[i].shape,
70
                expected_results[i],
71
            )
72
73
        expected_results = [(8, 5), (2, 5), (8,), (2,)]
74
        for i, _ in enumerate(expected_results):
75
            self.assertEqual(
76
                train_dev_test_split(
77
                    self.data_split, target=self.data_target, dev_size=0, test_size=0.2
78
                )[i].shape,
79
                expected_results[i],
80
            )
81
82
        expected_results = [(5, 5), (5, 5), (5,), (5,)]
83
        for i, _ in enumerate(expected_results):
84
            self.assertEqual(
85
                train_dev_test_split(
86
                    self.data_split, target=self.data_target, dev_size=0.5, test_size=0
87
                )[i].shape,
88
                expected_results[i],
89
            )
90