|
1
|
|
|
import os |
|
2
|
|
|
import pytest |
|
3
|
|
|
import numpy as np |
|
4
|
|
|
import numpy.testing as npt |
|
5
|
|
|
import pandas as pd |
|
6
|
|
|
import diff_classifier.msd as msd |
|
7
|
|
|
import diff_classifier.pca as pca |
|
8
|
|
|
import diff_classifier.features as ft |
|
9
|
|
|
|
|
10
|
|
|
is_travis = "CI" in os.environ.keys() |
|
11
|
|
|
|
|
12
|
|
|
|
|
13
|
|
|
# @pytest.mark.skipif(is_travis, reason="Function behaves differently on Travis.") |
|
14
|
|
|
@pytest.mark.xfail |
|
15
|
|
|
def test_partial_corr(): |
|
16
|
|
|
dataf = msd.random_traj_dataset() |
|
17
|
|
|
msds = msd.all_msds2(dataf, frames=100) |
|
18
|
|
|
feat = ft.calculate_features(msds) |
|
19
|
|
|
pcorr = pca.partial_corr(feat) |
|
20
|
|
|
npt.assert_equal(24.0, np.round(np.sum(pcorr), 1)) |
|
21
|
|
|
|
|
22
|
|
|
dataf = msd.random_traj_dataset(nparts=10) |
|
23
|
|
|
msds = msd.all_msds2(dataf, frames=100) |
|
24
|
|
|
feat = ft.calculate_features(msds) |
|
25
|
|
|
pcorr = pca.partial_corr(feat) |
|
26
|
|
|
npt.assert_equal(47.9, np.round(np.sum(pcorr), 1)) |
|
27
|
|
|
|
|
28
|
|
|
dataf = msd.random_traj_dataset(nparts=10, seed=9) |
|
29
|
|
|
msds = msd.all_msds2(dataf, frames=100) |
|
30
|
|
|
feat = ft.calculate_features(msds) |
|
31
|
|
|
pcorr = pca.partial_corr(feat) |
|
32
|
|
|
npt.assert_equal(33.4, np.round(np.sum(pcorr), 1)) |
|
33
|
|
|
|
|
34
|
|
|
dataf = msd.random_traj_dataset(nparts=10, nframes=40, seed=9) |
|
35
|
|
|
msds = msd.all_msds2(dataf, frames=40) |
|
36
|
|
|
feat = ft.calculate_features(msds) |
|
37
|
|
|
pcorr = pca.partial_corr(feat) |
|
38
|
|
|
npt.assert_equal(17.4, np.round(np.sum(pcorr), 1)) |
|
39
|
|
|
|
|
40
|
|
|
dataf = msd.random_traj_dataset(nparts=10, nframes=40, ndist=(3, 5), seed=9) |
|
41
|
|
|
msds = msd.all_msds2(dataf, frames=40) |
|
42
|
|
|
feat = ft.calculate_features(msds) |
|
43
|
|
|
pcorr = pca.partial_corr(feat) |
|
44
|
|
|
npt.assert_equal(35.7, np.round(np.sum(pcorr), 1)) |
|
45
|
|
|
|
|
46
|
|
|
|
|
47
|
|
|
# @pytest.mark.skipif(is_travis, reason="Function behaves differently on Travis.") |
|
48
|
|
|
@pytest.mark.xfail |
|
49
|
|
|
def test_kmo(): |
|
50
|
|
|
dataf = msd.random_traj_dataset(nparts=10, ndist=(1, 1), seed=3) |
|
51
|
|
|
msds = msd.all_msds2(dataf, frames=100) |
|
52
|
|
|
feat = ft.calculate_features(msds) |
|
53
|
|
|
dataset = feat.drop(['frames', 'Track_ID'], axis=1) |
|
54
|
|
|
corrmatrix = np.corrcoef(dataset.transpose()) |
|
55
|
|
|
npt.assert_equal(np.round(np.sum(corrmatrix), 1), 7.3) |
|
56
|
|
|
|
|
57
|
|
|
|
|
58
|
|
|
def test_pca_analysis(): |
|
59
|
|
|
dataf = msd.random_traj_dataset(nparts=10, ndist=(2, 6)) |
|
60
|
|
|
msds = msd.all_msds2(dataf, frames=100) |
|
61
|
|
|
feat = ft.calculate_features(msds) |
|
62
|
|
|
pcadataset = pca.pca_analysis(feat, dropcols=['frames', 'Track_ID'], |
|
63
|
|
|
n_components=5) |
|
64
|
|
|
|
|
65
|
|
|
npt.assert_equal(np.round(np.sum(pcadataset.components.values), 3), 0.400) |
|
66
|
|
|
|
|
67
|
|
|
|
|
68
|
|
|
def test_plot_pca(): |
|
69
|
|
|
print() |
|
70
|
|
|
|
|
71
|
|
|
|
|
72
|
|
|
def test_build_KNN_model(): |
|
73
|
|
|
output = ['F']*1000 + ['M']*1000 |
|
74
|
|
|
data = {'output': output, |
|
75
|
|
|
0: np.append(np.random.normal(1, 1, size=1000), |
|
76
|
|
|
np.random.normal(2, 1, size=1000)), |
|
77
|
|
|
1: np.append(np.random.normal(0.1, 0.1, size=1000), |
|
78
|
|
|
np.random.normal(0.2, 0.1, size=1000))} |
|
79
|
|
|
dataf = pd.DataFrame(data) |
|
80
|
|
|
|
|
81
|
|
|
model, X, Y = pca.build_KNN_model(dataf, 'output', ['F', 'M'], |
|
82
|
|
|
equal_sampling=False, tsize=25, |
|
83
|
|
|
n_neighbors=5, input_cols=2) |
|
84
|
|
|
|
|
85
|
|
|
assert X.shape == (25, 2) |
|
86
|
|
|
assert Y.shape == (25,) |
|
87
|
|
|
|
|
88
|
|
|
|
|
89
|
|
|
def test_predict_KNN(): |
|
90
|
|
|
output = ['F']*1000 + ['M']*1000 |
|
91
|
|
|
data = {'output': output, |
|
92
|
|
|
0: np.append(np.random.normal(1, 1, size=1000), |
|
93
|
|
|
np.random.normal(2, 1, size=1000)), |
|
94
|
|
|
1: np.append(np.random.normal(0.1, 0.1, size=1000), |
|
95
|
|
|
np.random.normal(0.2, 0.1, size=1000))} |
|
96
|
|
|
dataf = pd.DataFrame(data) |
|
97
|
|
|
|
|
98
|
|
|
model, X, Y = pca.build_KNN_model(dataf, 'output', ['F', 'M'], |
|
99
|
|
|
equal_sampling=False, tsize=25, |
|
100
|
|
|
n_neighbors=5, input_cols=2) |
|
101
|
|
|
|
|
102
|
|
|
testp = np.array([]) |
|
103
|
|
|
for i in range(0, 30): |
|
|
|
|
|
|
104
|
|
|
KNNmod, X, y = pca.build_KNN_model(dataf, 'output', ['F', 'M'], |
|
105
|
|
|
equal_sampling=True, tsize=25, |
|
106
|
|
|
n_neighbors=5, input_cols=2) |
|
107
|
|
|
|
|
108
|
|
|
X2 = dataf.values[:, -2:] |
|
109
|
|
|
y2 = dataf.values[:, 0] |
|
110
|
|
|
testp = np.append(testp, pca.predict_KNN(KNNmod, X2, y2)) |
|
111
|
|
|
|
|
112
|
|
|
assert np.mean(testp) > 0.6 |
|
113
|
|
|
|
|
114
|
|
|
# test 2 |
|
115
|
|
|
data = {'output': output, |
|
116
|
|
|
0: np.append(np.random.normal(1, 1, size=1000), |
|
117
|
|
|
np.random.normal(1000, 1, size=1000)), |
|
118
|
|
|
1: np.append(np.random.normal(0.1, 0.1, size=1000), |
|
119
|
|
|
np.random.normal(100, 0.1, size=1000))} |
|
120
|
|
|
dataf = pd.DataFrame(data) |
|
121
|
|
|
|
|
122
|
|
|
model, X, Y = pca.build_KNN_model(dataf, 'output', ['F', 'M'], |
|
123
|
|
|
equal_sampling=False, tsize=25, |
|
124
|
|
|
n_neighbors=5, input_cols=2) |
|
125
|
|
|
|
|
126
|
|
|
testp = np.array([]) |
|
127
|
|
|
for i in range(0, 30): |
|
128
|
|
|
KNNmod, X, y = pca.build_KNN_model(dataf, 'output', ['F', 'M'], |
|
129
|
|
|
equal_sampling=True, tsize=25, |
|
130
|
|
|
n_neighbors=5, input_cols=2) |
|
131
|
|
|
|
|
132
|
|
|
X2 = dataf.values[:, -2:] |
|
133
|
|
|
y2 = dataf.values[:, 0] |
|
134
|
|
|
testp = np.append(testp, pca.predict_KNN(KNNmod, X2, y2)) |
|
135
|
|
|
|
|
136
|
|
|
assert np.mean(testp) > 0.95 |
|
137
|
|
|
|
|
138
|
|
|
|
|
139
|
|
|
def test_feature_violin(): |
|
140
|
|
|
|
|
141
|
|
|
np.random.seed(seed=1) |
|
142
|
|
|
dataset = {'label': 10*['yes'] + 10*['no'], |
|
143
|
|
|
0: np.random.normal(0.5, 1, size=20), |
|
144
|
|
|
1: np.random.normal(1, 2, size=20), |
|
145
|
|
|
2: np.random.normal(3, 10, size=20) |
|
146
|
|
|
} |
|
147
|
|
|
df = pd.DataFrame(data=dataset) |
|
148
|
|
|
|
|
149
|
|
|
to_violin = pca.feature_violin(df, fname='test.png') |
|
150
|
|
|
|
|
151
|
|
|
assert to_violin.values.shape == (60, 3) |
|
152
|
|
|
assert np.round(np.mean(to_violin['Feature Value']), 1) == 2.1 |
|
153
|
|
|
|
|
154
|
|
|
|
|
155
|
|
View Code Duplication |
def test_feature_plot_2D(): |
|
|
|
|
|
|
156
|
|
|
|
|
157
|
|
|
np.random.seed(seed=1) |
|
158
|
|
|
dataset = {'label': 250*['yes'] + 250*['no'], |
|
159
|
|
|
0: np.random.normal(0.5, 1, size=500), |
|
160
|
|
|
1: np.random.normal(1, 2, size=500), |
|
161
|
|
|
2: np.random.normal(3, 10, size=500) |
|
162
|
|
|
} |
|
163
|
|
|
df = pd.DataFrame(data=dataset) |
|
164
|
|
|
|
|
165
|
|
|
xy = pca.feature_plot_2D(df, label='label', features=[0, 1], randsel=True, |
|
166
|
|
|
fname='test1.png') |
|
167
|
|
|
# assert len(xy[1]) == 200 |
|
168
|
|
|
# assert os.path.isfile('test1.png') |
|
169
|
|
|
# |
|
170
|
|
|
# xy = pca.feature_plot_2D(df, label='label', features=[0, 1], randsel=False) |
|
171
|
|
|
# assert len(xy[1]) == 250 |
|
172
|
|
|
|
|
173
|
|
|
|
|
174
|
|
View Code Duplication |
def test_feature_plot_3D(): |
|
|
|
|
|
|
175
|
|
|
|
|
176
|
|
|
np.random.seed(seed=1) |
|
177
|
|
|
dataset = {'label': 250*['yes'] + 250*['no'], |
|
178
|
|
|
0: np.random.normal(0.5, 1, size=500), |
|
179
|
|
|
1: np.random.normal(1, 2, size=500), |
|
180
|
|
|
2: np.random.normal(3, 10, size=500) |
|
181
|
|
|
} |
|
182
|
|
|
df = pd.DataFrame(data=dataset) |
|
183
|
|
|
|
|
184
|
|
|
xy = pca.feature_plot_3D(df, label='label', features=[0, 1, 2], randsel=True, |
|
185
|
|
|
fname='test1.png') |
|
186
|
|
|
# assert len(xy[1]) == 200 |
|
187
|
|
|
# assert os.path.isfile('test1.png') |
|
188
|
|
|
# |
|
189
|
|
|
# xy = pca.feature_plot_3D(df, label='label', features=[0, 1, 2], randsel=False) |
|
190
|
|
|
# assert len(xy[1]) == 250 |
|
191
|
|
|
|