test_pca - Code Metrics - Inspection of "Revert "bunch o` change"" - ccurtis7/diff_classifier - Measure and Improve Code Quality continuously with Scrutinizer

Test Failed

Push — master ( ffcb6f...64434e )

by Chad

created 2019-07-12 05:06 UTC

test_pca A

↳ Parent: Project

Complexity

Total Complexity

Size/Duplication

Total Lines	186
Duplicated Lines	12.9 %

Importance

Changes

Metric	Value
eloc	133
dl	24
loc	186
rs	10
c	0
b	0
f	0
wmc	11

9 Functions

Rating	Name	Duplication	Size	Complexity
A	test_feature_plot_2D()	12	12	1
A	test_feature_violin()	0	14	1
A	test_kmo()	0	8	1
A	test_build_KNN_model()	0	15	1
A	test_feature_plot_3D()	12	12	1
A	test_pca_analysis()	0	8	1
A	test_partial_corr()	0	31	1
B	test_predict_KNN()	0	48	3
A	test_plot_pca()	0	2	1

How to fix Duplicated Code

import os
import pytest
import numpy as np
import numpy.testing as npt
import pandas as pd
import diff_classifier.msd as msd
import diff_classifier.pca as pca
import diff_classifier.features as ft

is_travis = "CI" in os.environ.keys()


# @pytest.mark.skipif(is_travis, reason="Function behaves differently on Travis.")
@pytest.mark.xfail
def test_partial_corr():
    dataf = msd.random_traj_dataset()
    msds = msd.all_msds2(dataf, frames=100)
    feat = ft.calculate_features(msds)
    pcorr = pca.partial_corr(feat)
    npt.assert_equal(24.0, np.round(np.sum(pcorr), 1))

    dataf = msd.random_traj_dataset(nparts=10)
    msds = msd.all_msds2(dataf, frames=100)
    feat = ft.calculate_features(msds)
    pcorr = pca.partial_corr(feat)
    npt.assert_equal(47.9, np.round(np.sum(pcorr), 1))

    dataf = msd.random_traj_dataset(nparts=10, seed=9)
    msds = msd.all_msds2(dataf, frames=100)
    feat = ft.calculate_features(msds)
    pcorr = pca.partial_corr(feat)
    npt.assert_equal(33.4, np.round(np.sum(pcorr), 1))

    dataf = msd.random_traj_dataset(nparts=10, nframes=40, seed=9)
    msds = msd.all_msds2(dataf, frames=40)
    feat = ft.calculate_features(msds)
    pcorr = pca.partial_corr(feat)
    npt.assert_equal(17.4, np.round(np.sum(pcorr), 1))

    dataf = msd.random_traj_dataset(nparts=10, nframes=40, ndist=(3, 5), seed=9)
    msds = msd.all_msds2(dataf, frames=40)
    feat = ft.calculate_features(msds)
    pcorr = pca.partial_corr(feat)
    npt.assert_equal(35.7, np.round(np.sum(pcorr), 1))


# @pytest.mark.skipif(is_travis, reason="Function behaves differently on Travis.")
@pytest.mark.xfail
def test_kmo():
    dataf = msd.random_traj_dataset(nparts=10, ndist=(1, 1), seed=3)
    msds = msd.all_msds2(dataf, frames=100)
    feat = ft.calculate_features(msds)
    dataset = feat.drop(['frames', 'Track_ID'], axis=1)
    corrmatrix = np.corrcoef(dataset.transpose())
    npt.assert_equal(np.round(np.sum(corrmatrix), 1), 7.3)


def test_pca_analysis():
    dataf = msd.random_traj_dataset(nparts=10, ndist=(2, 6))
    msds = msd.all_msds2(dataf, frames=100)
    feat = ft.calculate_features(msds)
    pcadataset = pca.pca_analysis(feat, dropcols=['frames', 'Track_ID'],
                                  n_components=5)
    
    npt.assert_equal(np.round(np.sum(pcadataset.components.values), 3), 0.400)


def test_plot_pca():
    print()


def test_build_KNN_model():
    output = ['F']*1000 + ['M']*1000
    data = {'output': output,
            0: np.append(np.random.normal(1, 1, size=1000),
                         np.random.normal(2, 1, size=1000)),
            1: np.append(np.random.normal(0.1, 0.1, size=1000),
                         np.random.normal(0.2, 0.1, size=1000))}
    dataf = pd.DataFrame(data)

    model, X, Y = pca.build_KNN_model(dataf, 'output', ['F', 'M'],
                                      equal_sampling=False, tsize=25,
                                      n_neighbors=5, input_cols=2)

    assert X.shape == (25, 2)
    assert Y.shape == (25,)


def test_predict_KNN():
    output = ['F']*1000 + ['M']*1000
    data = {'output': output,
            0: np.append(np.random.normal(1, 1, size=1000),
                         np.random.normal(2, 1, size=1000)),
            1: np.append(np.random.normal(0.1, 0.1, size=1000),
                         np.random.normal(0.2, 0.1, size=1000))}
    dataf = pd.DataFrame(data)

    model, X, Y = pca.build_KNN_model(dataf, 'output', ['F', 'M'],
                                      equal_sampling=False, tsize=25,
                                      n_neighbors=5, input_cols=2)

    testp = np.array([])
    for i in range(0, 30):

        KNNmod, X, y = pca.build_KNN_model(dataf, 'output', ['F', 'M'],
                                           equal_sampling=True, tsize=25,
                                           n_neighbors=5, input_cols=2)

        X2 = dataf.values[:, -2:]
        y2 = dataf.values[:, 0]
        testp = np.append(testp, pca.predict_KNN(KNNmod, X2, y2))

    assert np.mean(testp) > 0.6

    # test 2
    data = {'output': output,
            0: np.append(np.random.normal(1, 1, size=1000),
                         np.random.normal(1000, 1, size=1000)),
            1: np.append(np.random.normal(0.1, 0.1, size=1000),
                         np.random.normal(100, 0.1, size=1000))}
    dataf = pd.DataFrame(data)

    model, X, Y = pca.build_KNN_model(dataf, 'output', ['F', 'M'],
                                      equal_sampling=False, tsize=25,
                                      n_neighbors=5, input_cols=2)

    testp = np.array([])
    for i in range(0, 30):
        KNNmod, X, y = pca.build_KNN_model(dataf, 'output', ['F', 'M'],
                                           equal_sampling=True, tsize=25,
                                           n_neighbors=5, input_cols=2)

        X2 = dataf.values[:, -2:]
        y2 = dataf.values[:, 0]
        testp = np.append(testp, pca.predict_KNN(KNNmod, X2, y2))

    assert np.mean(testp) > 0.95


def test_feature_violin():

    np.random.seed(seed=1)
    dataset = {'label': 10*['yes'] + 10*['no'],
               0: np.random.normal(0.5, 1, size=20),
               1: np.random.normal(1, 2, size=20),
               2: np.random.normal(3, 10, size=20)
               }
    df = pd.DataFrame(data=dataset)

    to_violin = pca.feature_violin(df, fname='test.png')

    assert to_violin.values.shape == (60, 3)
    assert np.round(np.mean(to_violin['Feature Value']), 1) == 2.1


def test_feature_plot_2D():


    np.random.seed(seed=1)
    dataset = {'label': 250*['yes'] + 250*['no'],
               0: np.random.normal(0.5, 1, size=500),
               1: np.random.normal(1, 2, size=500),
               2: np.random.normal(3, 10, size=500)
               }
    df = pd.DataFrame(data=dataset)

    xy = pca.feature_plot_2D(df, label='label', features=[0, 1], randsel=True,
                             fname='test1.png')
    # assert len(xy[1]) == 200
    # assert os.path.isfile('test1.png')
    #
    # xy = pca.feature_plot_2D(df, label='label', features=[0, 1], randsel=False)
    # assert len(xy[1]) == 250


def test_feature_plot_3D():


    np.random.seed(seed=1)
    dataset = {'label': 250*['yes'] + 250*['no'],
               0: np.random.normal(0.5, 1, size=500),
               1: np.random.normal(1, 2, size=500),
               2: np.random.normal(3, 10, size=500)
               }
    df = pd.DataFrame(data=dataset)

    xy = pca.feature_plot_3D(df, label='label', features=[0, 1, 2], randsel=True,
                             fname='test1.png')
    # assert len(xy[1]) == 200
    # assert os.path.isfile('test1.png')
    #
    # xy = pca.feature_plot_3D(df, label='label', features=[0, 1, 2], randsel=False)
    # assert len(xy[1]) == 250


1		import os
2		import pytest
3		import numpy as np
4		import numpy.testing as npt
5		import pandas as pd
6		import diff_classifier.msd as msd
7		import diff_classifier.pca as pca
8		import diff_classifier.features as ft
9
10		is_travis = "CI" in os.environ.keys()
11
12
13		# @pytest.mark.skipif(is_travis, reason="Function behaves differently on Travis.")
14		@pytest.mark.xfail
15		def test_partial_corr():
16		dataf = msd.random_traj_dataset()
17		msds = msd.all_msds2(dataf, frames=100)
18		feat = ft.calculate_features(msds)
19		pcorr = pca.partial_corr(feat)
20		npt.assert_equal(24.0, np.round(np.sum(pcorr), 1))
21
22		dataf = msd.random_traj_dataset(nparts=10)
23		msds = msd.all_msds2(dataf, frames=100)
24		feat = ft.calculate_features(msds)
25		pcorr = pca.partial_corr(feat)
26		npt.assert_equal(47.9, np.round(np.sum(pcorr), 1))
27
28		dataf = msd.random_traj_dataset(nparts=10, seed=9)
29		msds = msd.all_msds2(dataf, frames=100)
30		feat = ft.calculate_features(msds)
31		pcorr = pca.partial_corr(feat)
32		npt.assert_equal(33.4, np.round(np.sum(pcorr), 1))
33
34		dataf = msd.random_traj_dataset(nparts=10, nframes=40, seed=9)
35		msds = msd.all_msds2(dataf, frames=40)
36		feat = ft.calculate_features(msds)
37		pcorr = pca.partial_corr(feat)
38		npt.assert_equal(17.4, np.round(np.sum(pcorr), 1))
39
40		dataf = msd.random_traj_dataset(nparts=10, nframes=40, ndist=(3, 5), seed=9)
41		msds = msd.all_msds2(dataf, frames=40)
42		feat = ft.calculate_features(msds)
43		pcorr = pca.partial_corr(feat)
44		npt.assert_equal(35.7, np.round(np.sum(pcorr), 1))
45
46
47		# @pytest.mark.skipif(is_travis, reason="Function behaves differently on Travis.")
48		@pytest.mark.xfail
49		def test_kmo():
50		dataf = msd.random_traj_dataset(nparts=10, ndist=(1, 1), seed=3)
51		msds = msd.all_msds2(dataf, frames=100)
52		feat = ft.calculate_features(msds)
53		dataset = feat.drop(['frames', 'Track_ID'], axis=1)
54		corrmatrix = np.corrcoef(dataset.transpose())
55		npt.assert_equal(np.round(np.sum(corrmatrix), 1), 7.3)
56
57
58		def test_pca_analysis():
59		dataf = msd.random_traj_dataset(nparts=10, ndist=(2, 6))
60		msds = msd.all_msds2(dataf, frames=100)
61		feat = ft.calculate_features(msds)
62		pcadataset = pca.pca_analysis(feat, dropcols=['frames', 'Track_ID'],
63		n_components=5)
64
65		npt.assert_equal(np.round(np.sum(pcadataset.components.values), 3), 0.400)
66
67
68		def test_plot_pca():
69		print()
70
71
72		def test_build_KNN_model():
73		output = ['F']1000 + ['M']1000
74		data = {'output': output,
75		0: np.append(np.random.normal(1, 1, size=1000),
76		np.random.normal(2, 1, size=1000)),
77		1: np.append(np.random.normal(0.1, 0.1, size=1000),
78		np.random.normal(0.2, 0.1, size=1000))}
79		dataf = pd.DataFrame(data)
80
81		model, X, Y = pca.build_KNN_model(dataf, 'output', ['F', 'M'],
82		equal_sampling=False, tsize=25,
83		n_neighbors=5, input_cols=2)
84
85		assert X.shape == (25, 2)
86		assert Y.shape == (25,)
87
88
89		def test_predict_KNN():
90		output = ['F']1000 + ['M']1000
91		data = {'output': output,
92		0: np.append(np.random.normal(1, 1, size=1000),
93		np.random.normal(2, 1, size=1000)),
94		1: np.append(np.random.normal(0.1, 0.1, size=1000),
95		np.random.normal(0.2, 0.1, size=1000))}
96		dataf = pd.DataFrame(data)
97
98		model, X, Y = pca.build_KNN_model(dataf, 'output', ['F', 'M'],
99		equal_sampling=False, tsize=25,
100		n_neighbors=5, input_cols=2)
101
102		testp = np.array([])
103		for i in range(0, 30):
		0 ignored issues – show Comprehensibility Best Practice introduced 2019-07-12 04:49 UTC by Report Bug Copy Issue Report The variable `range` does not seem to be defined. Loading history...
104		KNNmod, X, y = pca.build_KNN_model(dataf, 'output', ['F', 'M'],
105		equal_sampling=True, tsize=25,
106		n_neighbors=5, input_cols=2)
107
108		X2 = dataf.values[:, -2:]
109		y2 = dataf.values[:, 0]
110		testp = np.append(testp, pca.predict_KNN(KNNmod, X2, y2))
111
112		assert np.mean(testp) > 0.6
113
114		# test 2
115		data = {'output': output,
116		0: np.append(np.random.normal(1, 1, size=1000),
117		np.random.normal(1000, 1, size=1000)),
118		1: np.append(np.random.normal(0.1, 0.1, size=1000),
119		np.random.normal(100, 0.1, size=1000))}
120		dataf = pd.DataFrame(data)
121
122		model, X, Y = pca.build_KNN_model(dataf, 'output', ['F', 'M'],
123		equal_sampling=False, tsize=25,
124		n_neighbors=5, input_cols=2)
125
126		testp = np.array([])
127		for i in range(0, 30):
128		KNNmod, X, y = pca.build_KNN_model(dataf, 'output', ['F', 'M'],
129		equal_sampling=True, tsize=25,
130		n_neighbors=5, input_cols=2)
131
132		X2 = dataf.values[:, -2:]
133		y2 = dataf.values[:, 0]
134		testp = np.append(testp, pca.predict_KNN(KNNmod, X2, y2))
135
136		assert np.mean(testp) > 0.95
137
138
139		def test_feature_violin():
140
141		np.random.seed(seed=1)
142		dataset = {'label': 10['yes'] + 10['no'],
143		0: np.random.normal(0.5, 1, size=20),
144		1: np.random.normal(1, 2, size=20),
145		2: np.random.normal(3, 10, size=20)
146		}
147		df = pd.DataFrame(data=dataset)
148
149		to_violin = pca.feature_violin(df, fname='test.png')
150
151		assert to_violin.values.shape == (60, 3)
152		assert np.round(np.mean(to_violin['Feature Value']), 1) == 2.1
153
154
155	View Code Duplication	def test_feature_plot_2D():
		0 ignored issues – show Duplication introduced 2019-03-06 18:05 UTC by Report Bug Copy Issue Report This code seems to be duplicated in your project. Loading history...
156
157		np.random.seed(seed=1)
158		dataset = {'label': 250['yes'] + 250['no'],
159		0: np.random.normal(0.5, 1, size=500),
160		1: np.random.normal(1, 2, size=500),
161		2: np.random.normal(3, 10, size=500)
162		}
163		df = pd.DataFrame(data=dataset)
164
165		xy = pca.feature_plot_2D(df, label='label', features=[0, 1], randsel=True,
166		fname='test1.png')
167		# assert len(xy[1]) == 200
168		# assert os.path.isfile('test1.png')
169		#
170		# xy = pca.feature_plot_2D(df, label='label', features=[0, 1], randsel=False)
171		# assert len(xy[1]) == 250
172
173
174	View Code Duplication	def test_feature_plot_3D():
		0 ignored issues – show Duplication introduced 2019-03-06 18:05 UTC by Report Bug Copy Issue Report This code seems to be duplicated in your project. Loading history...
175
176		np.random.seed(seed=1)
177		dataset = {'label': 250['yes'] + 250['no'],
178		0: np.random.normal(0.5, 1, size=500),
179		1: np.random.normal(1, 2, size=500),
180		2: np.random.normal(3, 10, size=500)
181		}
182		df = pd.DataFrame(data=dataset)
183
184		xy = pca.feature_plot_3D(df, label='label', features=[0, 1, 2], randsel=True,
185		fname='test1.png')
186		# assert len(xy[1]) == 200
187		# assert os.path.isfile('test1.png')
188		#
189		# xy = pca.feature_plot_3D(df, label='label', features=[0, 1, 2], randsel=False)
190		# assert len(xy[1]) == 250
191

ccurtis7 / diff_classifier

Push — master ( ffcb6f...64434e )

test_pca A

Complexity

Size/Duplication

Importance

9 Functions

How to fix Duplicated Code

Duplicated Code

Duplication Side-by-Side

Filter issues like