GitHub Access Token became invalid

It seems like the GitHub access token used for retrieving details about this repository from GitHub became invalid. This might prevent certain types of inspections from being run (in particular, everything related to pull requests).
Please ask an admin of your repository to re-new the access token on this website.
Passed
Branch master (5deb01)
by Andreas
02:32
created

klib.tests.test_preprocess   A

Complexity

Total Complexity 9

Size/Duplication

Total Lines 58
Duplicated Lines 56.9 %

Importance

Changes 0
Metric Value
eloc 46
dl 33
loc 58
rs 10
c 0
b 0
f 0
wmc 9

3 Methods

Rating   Name   Duplication   Size   Complexity  
A Test_train_dev_test_split.setUpClass() 0 14 1
A Test_train_dev_test_split.test_train_dev_test_split_series() 17 17 4
A Test_train_dev_test_split.test_train_dev_test_split_col() 16 17 4

How to fix   Duplicated Code   

Duplicated Code

Duplicate code is one of the most pungent code smells. A rule that is often used is to re-structure code once it is duplicated in three or more places.

Common duplication problems, and corresponding solutions are:

1
import pandas as pd
2
import unittest
3
from ..preprocess import train_dev_test_split
4
5
6
class Test_train_dev_test_split(unittest.TestCase):
7
8
    @classmethod
9
    def setUpClass(cls):
10
        cls.data_split = pd.DataFrame([[1, 2, 3, 4, 'a'],
11
                                       [2, 4, 5, 6, 'b'],
12
                                       [3, 4, 2, 1, 'c'],
13
                                       [4, 0, 3, 4, 'a'],
14
                                       [5, 4, 5, 6, 'b'],
15
                                       [6, 4, 2, 1, 'c'],
16
                                       [7, 0, 3, 4, 'a'],
17
                                       [8, 4, 5, 6, 'b'],
18
                                       [9, 4, 2, 1, 'c'],
19
                                       [10, 2, 1, 5, 'b']],
20
                                      columns=['Col1', 'Col2', 'Col3', 'Col4', 'Col5'])
21
        cls.data_target = pd.Series([1, 0, 1, 0, 0, 1, 1, 0, 1, 1])
22
23 View Code Duplication
    def test_train_dev_test_split_col(self):
24
        # Test the propper splitting in train, dev and test sets
25
26
        expected_results = [(8, 4), (1, 4), (1, 4), (8,), (1,), (1,)]
27
        for i, _ in enumerate(expected_results):
28
            self.assertEqual(train_dev_test_split(self.data_split, 'Col2',
29
                                                  random_state=1234)[i].shape, expected_results[i])
30
31
        expected_results = [(8, 4), (2, 4), (8,), (2,)]
32
        for i, _ in enumerate(expected_results):
33
            self.assertEqual(train_dev_test_split(self.data_split, target='Col2',
34
                                                  dev_size=0, test_size=0.2)[i].shape, expected_results[i])
35
36
        expected_results = [(5, 4), (5, 4), (5,), (5,)]
37
        for i, _ in enumerate(expected_results):
38
            self.assertEqual(train_dev_test_split(self.data_split, target='Col2',
39
                                                  dev_size=0.5, test_size=0)[i].shape, expected_results[i])
40
41 View Code Duplication
    def test_train_dev_test_split_series(self):
42
        # Test the propper splitting in train, dev and test sets
43
44
        expected_results = [(6, 5), (2, 5), (2, 5), (6,), (2,), (2,)]
45
        for i, _ in enumerate(expected_results):
46
            self.assertEqual(train_dev_test_split(self.data_split, target=self.data_target,
47
                                                  dev_size=0.2, test_size=0.2)[i].shape, expected_results[i])
48
49
        expected_results = [(8, 5), (2, 5), (8,), (2,)]
50
        for i, _ in enumerate(expected_results):
51
            self.assertEqual(train_dev_test_split(self.data_split, target=self.data_target,
52
                                                  dev_size=0, test_size=0.2)[i].shape, expected_results[i])
53
54
        expected_results = [(5, 5), (5, 5), (5,), (5,)]
55
        for i, _ in enumerate(expected_results):
56
            self.assertEqual(train_dev_test_split(self.data_split, target=self.data_target,
57
                                                  dev_size=0.5, test_size=0)[i].shape, expected_results[i])
58