1
|
|
|
import os |
2
|
|
|
import pytest |
3
|
|
|
import numpy as np |
4
|
|
|
import numpy.testing as npt |
5
|
|
|
import pandas as pd |
6
|
|
|
import diff_classifier.msd as msd |
7
|
|
|
import diff_classifier.pca as pca |
8
|
|
|
import diff_classifier.features as ft |
9
|
|
|
|
10
|
|
|
is_travis = "CI" in os.environ.keys() |
11
|
|
|
|
12
|
|
|
|
13
|
|
|
# @pytest.mark.skipif(is_travis, reason="Function behaves differently on Travis.") |
14
|
|
|
@pytest.mark.xfail |
15
|
|
|
def test_partial_corr(): |
16
|
|
|
dataf = msd.random_traj_dataset() |
17
|
|
|
msds = msd.all_msds2(dataf, frames=100) |
18
|
|
|
feat = ft.calculate_features(msds, mean_values=False) |
19
|
|
|
pcorr = pca.partial_corr(feat) |
20
|
|
|
npt.assert_equal(24.0, np.round(np.sum(pcorr), 1)) |
21
|
|
|
|
22
|
|
|
dataf = msd.random_traj_dataset(nparts=10) |
23
|
|
|
msds = msd.all_msds2(dataf, frames=100) |
24
|
|
|
feat = ft.calculate_features(msds, mean_values=False) |
25
|
|
|
pcorr = pca.partial_corr(feat) |
26
|
|
|
npt.assert_equal(47.9, np.round(np.sum(pcorr), 1)) |
27
|
|
|
|
28
|
|
|
dataf = msd.random_traj_dataset(nparts=10, seed=9) |
29
|
|
|
msds = msd.all_msds2(dataf, frames=100) |
30
|
|
|
feat = ft.calculate_features(msds, mean_values=False) |
31
|
|
|
pcorr = pca.partial_corr(feat) |
32
|
|
|
npt.assert_equal(33.4, np.round(np.sum(pcorr), 1)) |
33
|
|
|
|
34
|
|
|
dataf = msd.random_traj_dataset(nparts=10, nframes=40, seed=9) |
35
|
|
|
msds = msd.all_msds2(dataf, frames=40) |
36
|
|
|
feat = ft.calculate_features(msds, mean_values=False) |
37
|
|
|
pcorr = pca.partial_corr(feat) |
38
|
|
|
npt.assert_equal(17.4, np.round(np.sum(pcorr), 1)) |
39
|
|
|
|
40
|
|
|
dataf = msd.random_traj_dataset(nparts=10, nframes=40, ndist=(3, 5), seed=9) |
41
|
|
|
msds = msd.all_msds2(dataf, frames=40) |
42
|
|
|
feat = ft.calculate_features(msds, mean_values=False) |
43
|
|
|
pcorr = pca.partial_corr(feat) |
44
|
|
|
npt.assert_equal(35.7, np.round(np.sum(pcorr), 1)) |
45
|
|
|
|
46
|
|
|
|
47
|
|
|
# @pytest.mark.skipif(is_travis, reason="Function behaves differently on Travis.") |
48
|
|
|
@pytest.mark.xfail |
49
|
|
|
def test_kmo(): |
50
|
|
|
dataf = msd.random_traj_dataset(nparts=10, ndist=(1, 1), seed=3) |
51
|
|
|
msds = msd.all_msds2(dataf, frames=100) |
52
|
|
|
feat = ft.calculate_features(msds, mean_values=False) |
53
|
|
|
dataset = feat.drop(['frames', 'Track_ID'], axis=1) |
54
|
|
|
corrmatrix = np.corrcoef(dataset.transpose()) |
55
|
|
|
npt.assert_equal(np.round(np.sum(corrmatrix), 1), 7.3) |
56
|
|
|
|
57
|
|
|
|
58
|
|
|
def test_pca_analysis(): |
59
|
|
|
dataf = msd.random_traj_dataset(nparts=10, ndist=(2, 6)) |
60
|
|
|
msds = msd.all_msds2(dataf, frames=100) |
61
|
|
|
feat = ft.calculate_features(msds, mean_values=False) |
62
|
|
|
pcadataset = pca.pca_analysis(feat, dropcols=['frames', 'Track_ID'], |
63
|
|
|
n_components=5) |
64
|
|
|
|
65
|
|
|
npt.assert_equal(np.round(np.sum(pcadataset.components.values), 3), 0.400) |
66
|
|
|
|
67
|
|
|
|
68
|
|
|
def test_plot_pca(): |
69
|
|
|
print() |
70
|
|
|
|
71
|
|
|
|
72
|
|
|
def test_build_model(): |
73
|
|
|
output = ['F']*1000 + ['M']*1000 |
74
|
|
|
data = {'output': output, |
75
|
|
|
0: np.append(np.random.normal(1, 1, size=1000), |
76
|
|
|
np.random.normal(2, 1, size=1000)), |
77
|
|
|
1: np.append(np.random.normal(0.1, 0.1, size=1000), |
78
|
|
|
np.random.normal(0.2, 0.1, size=1000))} |
79
|
|
|
dataf = pd.DataFrame(data) |
80
|
|
|
|
81
|
|
|
model, X, Y = pca.build_model(dataf, 'output', ['F', 'M'], |
82
|
|
|
equal_sampling=False, tsize=25, |
83
|
|
|
n_neighbors=5, input_cols=2) |
84
|
|
|
|
85
|
|
|
assert X.shape == (25, 2) |
86
|
|
|
assert Y.shape == (25,) |
87
|
|
|
|
88
|
|
|
|
89
|
|
|
def test_predict_model(): |
90
|
|
|
output = ['F']*1000 + ['M']*1000 |
91
|
|
|
data = {'output': output, |
92
|
|
|
0: np.append(np.random.normal(1, 1, size=1000), |
93
|
|
|
np.random.normal(2, 1, size=1000)), |
94
|
|
|
1: np.append(np.random.normal(0.1, 0.1, size=1000), |
95
|
|
|
np.random.normal(0.2, 0.1, size=1000))} |
96
|
|
|
dataf = pd.DataFrame(data) |
97
|
|
|
|
98
|
|
|
model, X, Y = pca.build_model(dataf, 'output', ['F', 'M'], |
99
|
|
|
equal_sampling=False, tsize=25, |
100
|
|
|
n_neighbors=5, input_cols=2) |
101
|
|
|
|
102
|
|
|
testp = np.array([]) |
103
|
|
|
for i in range(0, 30): |
|
|
|
|
104
|
|
|
KNNmod, X, y = pca.build_model(dataf, 'output', ['F', 'M'], |
105
|
|
|
equal_sampling=True, tsize=25, |
106
|
|
|
n_neighbors=5, input_cols=2) |
107
|
|
|
|
108
|
|
|
X2 = dataf.values[:, -2:] |
109
|
|
|
y2 = dataf.values[:, 0] |
110
|
|
|
testp = np.append(testp, pca.predict_model(KNNmod, X2, y2)) |
111
|
|
|
|
112
|
|
|
assert np.mean(testp) > 0.6 |
113
|
|
|
|
114
|
|
|
# test 2 |
115
|
|
|
data = {'output': output, |
116
|
|
|
0: np.append(np.random.normal(1, 1, size=1000), |
117
|
|
|
np.random.normal(1000, 1, size=1000)), |
118
|
|
|
1: np.append(np.random.normal(0.1, 0.1, size=1000), |
119
|
|
|
np.random.normal(100, 0.1, size=1000))} |
120
|
|
|
dataf = pd.DataFrame(data) |
121
|
|
|
|
122
|
|
|
model, X, Y = pca.build_model(dataf, 'output', ['F', 'M'], |
123
|
|
|
equal_sampling=False, tsize=25, |
124
|
|
|
n_neighbors=5, input_cols=2) |
125
|
|
|
|
126
|
|
|
testp = np.array([]) |
127
|
|
|
for i in range(0, 30): |
128
|
|
|
KNNmod, X, y = pca.build_model(dataf, 'output', ['F', 'M'], |
129
|
|
|
equal_sampling=True, tsize=25, |
130
|
|
|
n_neighbors=5, input_cols=2) |
131
|
|
|
|
132
|
|
|
X2 = dataf.values[:, -2:] |
133
|
|
|
y2 = dataf.values[:, 0] |
134
|
|
|
testp = np.append(testp, pca.predict_model(KNNmod, X2, y2)) |
135
|
|
|
|
136
|
|
|
assert np.mean(testp) > 0.95 |
137
|
|
|
|
138
|
|
|
|
139
|
|
|
def test_feature_violin(): |
140
|
|
|
|
141
|
|
|
np.random.seed(seed=1) |
142
|
|
|
dataset = {'label': 10*['yes'] + 10*['no'], |
143
|
|
|
0: np.random.normal(0.5, 1, size=20), |
144
|
|
|
1: np.random.normal(1, 2, size=20), |
145
|
|
|
2: np.random.normal(3, 10, size=20) |
146
|
|
|
} |
147
|
|
|
df = pd.DataFrame(data=dataset) |
148
|
|
|
|
149
|
|
|
to_violin = pca.feature_violin(df, fname='test.png') |
150
|
|
|
|
151
|
|
|
assert to_violin.values.shape == (60, 3) |
152
|
|
|
assert np.round(np.mean(to_violin['Feature Value']), 1) == 2.1 |
153
|
|
|
|
154
|
|
|
|
155
|
|
View Code Duplication |
def test_feature_plot_2D(): |
|
|
|
|
156
|
|
|
|
157
|
|
|
np.random.seed(seed=1) |
158
|
|
|
dataset = {'label': 250*['yes'] + 250*['no'], |
159
|
|
|
0: np.random.normal(0.5, 1, size=500), |
160
|
|
|
1: np.random.normal(1, 2, size=500), |
161
|
|
|
2: np.random.normal(3, 10, size=500) |
162
|
|
|
} |
163
|
|
|
df = pd.DataFrame(data=dataset) |
164
|
|
|
|
165
|
|
|
xy = pca.feature_plot_2D(df, label='label', features=[0, 1], randsel=True, |
166
|
|
|
lvals=['yes', 'no'], fname='test1.png') |
167
|
|
|
# assert len(xy[1]) == 200 |
168
|
|
|
# assert os.path.isfile('test1.png') |
169
|
|
|
# |
170
|
|
|
# xy = pca.feature_plot_2D(df, label='label', features=[0, 1], randsel=False) |
171
|
|
|
# assert len(xy[1]) == 250 |
172
|
|
|
|
173
|
|
|
|
174
|
|
View Code Duplication |
def test_feature_plot_3D(): |
|
|
|
|
175
|
|
|
|
176
|
|
|
np.random.seed(seed=1) |
177
|
|
|
dataset = {'label': 250*['yes'] + 250*['no'], |
178
|
|
|
0: np.random.normal(0.5, 1, size=500), |
179
|
|
|
1: np.random.normal(1, 2, size=500), |
180
|
|
|
2: np.random.normal(3, 10, size=500) |
181
|
|
|
} |
182
|
|
|
df = pd.DataFrame(data=dataset) |
183
|
|
|
|
184
|
|
|
xy = pca.feature_plot_3D(df, label='label', features=[0, 1, 2], |
185
|
|
|
lvals=['yes', 'no'], randsel=True, |
186
|
|
|
fname='test1.png') |
187
|
|
|
# assert len(xy[1]) == 200 |
188
|
|
|
# assert os.path.isfile('test1.png') |
189
|
|
|
# |
190
|
|
|
# xy = pca.feature_plot_3D(df, label='label', features=[0, 1, 2], randsel=False) |
191
|
|
|
# assert len(xy[1]) == 250 |
192
|
|
|
|