Test Failed
Push — master ( ffcb6f...64434e )
by Chad
03:11
created

test_pca.test_build_KNN_model()   A

Complexity

Conditions 1

Size

Total Lines 15
Code Lines 13

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
cc 1
eloc 13
nop 0
dl 0
loc 15
rs 9.75
c 0
b 0
f 0
1
import os
2
import pytest
3
import numpy as np
4
import numpy.testing as npt
5
import pandas as pd
6
import diff_classifier.msd as msd
7
import diff_classifier.pca as pca
8
import diff_classifier.features as ft
9
10
is_travis = "CI" in os.environ.keys()
11
12
13
# @pytest.mark.skipif(is_travis, reason="Function behaves differently on Travis.")
14
@pytest.mark.xfail
15
def test_partial_corr():
16
    dataf = msd.random_traj_dataset()
17
    msds = msd.all_msds2(dataf, frames=100)
18
    feat = ft.calculate_features(msds)
19
    pcorr = pca.partial_corr(feat)
20
    npt.assert_equal(24.0, np.round(np.sum(pcorr), 1))
21
22
    dataf = msd.random_traj_dataset(nparts=10)
23
    msds = msd.all_msds2(dataf, frames=100)
24
    feat = ft.calculate_features(msds)
25
    pcorr = pca.partial_corr(feat)
26
    npt.assert_equal(47.9, np.round(np.sum(pcorr), 1))
27
28
    dataf = msd.random_traj_dataset(nparts=10, seed=9)
29
    msds = msd.all_msds2(dataf, frames=100)
30
    feat = ft.calculate_features(msds)
31
    pcorr = pca.partial_corr(feat)
32
    npt.assert_equal(33.4, np.round(np.sum(pcorr), 1))
33
34
    dataf = msd.random_traj_dataset(nparts=10, nframes=40, seed=9)
35
    msds = msd.all_msds2(dataf, frames=40)
36
    feat = ft.calculate_features(msds)
37
    pcorr = pca.partial_corr(feat)
38
    npt.assert_equal(17.4, np.round(np.sum(pcorr), 1))
39
40
    dataf = msd.random_traj_dataset(nparts=10, nframes=40, ndist=(3, 5), seed=9)
41
    msds = msd.all_msds2(dataf, frames=40)
42
    feat = ft.calculate_features(msds)
43
    pcorr = pca.partial_corr(feat)
44
    npt.assert_equal(35.7, np.round(np.sum(pcorr), 1))
45
46
47
# @pytest.mark.skipif(is_travis, reason="Function behaves differently on Travis.")
48
@pytest.mark.xfail
49
def test_kmo():
50
    dataf = msd.random_traj_dataset(nparts=10, ndist=(1, 1), seed=3)
51
    msds = msd.all_msds2(dataf, frames=100)
52
    feat = ft.calculate_features(msds)
53
    dataset = feat.drop(['frames', 'Track_ID'], axis=1)
54
    corrmatrix = np.corrcoef(dataset.transpose())
55
    npt.assert_equal(np.round(np.sum(corrmatrix), 1), 7.3)
56
57
58
def test_pca_analysis():
59
    dataf = msd.random_traj_dataset(nparts=10, ndist=(2, 6))
60
    msds = msd.all_msds2(dataf, frames=100)
61
    feat = ft.calculate_features(msds)
62
    pcadataset = pca.pca_analysis(feat, dropcols=['frames', 'Track_ID'],
63
                                  n_components=5)
64
    
65
    npt.assert_equal(np.round(np.sum(pcadataset.components.values), 3), 0.400)
66
67
68
def test_plot_pca():
69
    print()
70
71
72
def test_build_KNN_model():
73
    output = ['F']*1000 + ['M']*1000
74
    data = {'output': output,
75
            0: np.append(np.random.normal(1, 1, size=1000),
76
                         np.random.normal(2, 1, size=1000)),
77
            1: np.append(np.random.normal(0.1, 0.1, size=1000),
78
                         np.random.normal(0.2, 0.1, size=1000))}
79
    dataf = pd.DataFrame(data)
80
81
    model, X, Y = pca.build_KNN_model(dataf, 'output', ['F', 'M'],
82
                                      equal_sampling=False, tsize=25,
83
                                      n_neighbors=5, input_cols=2)
84
85
    assert X.shape == (25, 2)
86
    assert Y.shape == (25,)
87
88
89
def test_predict_KNN():
90
    output = ['F']*1000 + ['M']*1000
91
    data = {'output': output,
92
            0: np.append(np.random.normal(1, 1, size=1000),
93
                         np.random.normal(2, 1, size=1000)),
94
            1: np.append(np.random.normal(0.1, 0.1, size=1000),
95
                         np.random.normal(0.2, 0.1, size=1000))}
96
    dataf = pd.DataFrame(data)
97
98
    model, X, Y = pca.build_KNN_model(dataf, 'output', ['F', 'M'],
99
                                      equal_sampling=False, tsize=25,
100
                                      n_neighbors=5, input_cols=2)
101
102
    testp = np.array([])
103
    for i in range(0, 30):
0 ignored issues
show
Comprehensibility Best Practice introduced by
The variable range does not seem to be defined.
Loading history...
104
        KNNmod, X, y = pca.build_KNN_model(dataf, 'output', ['F', 'M'],
105
                                           equal_sampling=True, tsize=25,
106
                                           n_neighbors=5, input_cols=2)
107
108
        X2 = dataf.values[:, -2:]
109
        y2 = dataf.values[:, 0]
110
        testp = np.append(testp, pca.predict_KNN(KNNmod, X2, y2))
111
112
    assert np.mean(testp) > 0.6
113
114
    # test 2
115
    data = {'output': output,
116
            0: np.append(np.random.normal(1, 1, size=1000),
117
                         np.random.normal(1000, 1, size=1000)),
118
            1: np.append(np.random.normal(0.1, 0.1, size=1000),
119
                         np.random.normal(100, 0.1, size=1000))}
120
    dataf = pd.DataFrame(data)
121
122
    model, X, Y = pca.build_KNN_model(dataf, 'output', ['F', 'M'],
123
                                      equal_sampling=False, tsize=25,
124
                                      n_neighbors=5, input_cols=2)
125
126
    testp = np.array([])
127
    for i in range(0, 30):
128
        KNNmod, X, y = pca.build_KNN_model(dataf, 'output', ['F', 'M'],
129
                                           equal_sampling=True, tsize=25,
130
                                           n_neighbors=5, input_cols=2)
131
132
        X2 = dataf.values[:, -2:]
133
        y2 = dataf.values[:, 0]
134
        testp = np.append(testp, pca.predict_KNN(KNNmod, X2, y2))
135
136
    assert np.mean(testp) > 0.95
137
138
139
def test_feature_violin():
140
141
    np.random.seed(seed=1)
142
    dataset = {'label': 10*['yes'] + 10*['no'],
143
               0: np.random.normal(0.5, 1, size=20),
144
               1: np.random.normal(1, 2, size=20),
145
               2: np.random.normal(3, 10, size=20)
146
               }
147
    df = pd.DataFrame(data=dataset)
148
149
    to_violin = pca.feature_violin(df, fname='test.png')
150
151
    assert to_violin.values.shape == (60, 3)
152
    assert np.round(np.mean(to_violin['Feature Value']), 1) == 2.1
153
154
155 View Code Duplication
def test_feature_plot_2D():
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated in your project.
Loading history...
156
157
    np.random.seed(seed=1)
158
    dataset = {'label': 250*['yes'] + 250*['no'],
159
               0: np.random.normal(0.5, 1, size=500),
160
               1: np.random.normal(1, 2, size=500),
161
               2: np.random.normal(3, 10, size=500)
162
               }
163
    df = pd.DataFrame(data=dataset)
164
165
    xy = pca.feature_plot_2D(df, label='label', features=[0, 1], randsel=True,
166
                             fname='test1.png')
167
    # assert len(xy[1]) == 200
168
    # assert os.path.isfile('test1.png')
169
    #
170
    # xy = pca.feature_plot_2D(df, label='label', features=[0, 1], randsel=False)
171
    # assert len(xy[1]) == 250
172
173
174 View Code Duplication
def test_feature_plot_3D():
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated in your project.
Loading history...
175
176
    np.random.seed(seed=1)
177
    dataset = {'label': 250*['yes'] + 250*['no'],
178
               0: np.random.normal(0.5, 1, size=500),
179
               1: np.random.normal(1, 2, size=500),
180
               2: np.random.normal(3, 10, size=500)
181
               }
182
    df = pd.DataFrame(data=dataset)
183
184
    xy = pca.feature_plot_3D(df, label='label', features=[0, 1, 2], randsel=True,
185
                             fname='test1.png')
186
    # assert len(xy[1]) == 200
187
    # assert os.path.isfile('test1.png')
188
    #
189
    # xy = pca.feature_plot_3D(df, label='label', features=[0, 1, 2], randsel=False)
190
    # assert len(xy[1]) == 250
191