Sklearn Preprocessing - Code Metrics - Inspection of "Merge branch 'master' into dev" - SimonBlanke/Hyperactive - Measure and Improve Code Quality continuously with Scrutinizer

Passed

Push — master ( ef01f2...fd3757 )

by Simon

created 2019-12-09 16:35 UTC

Sklearn Preprocessing A

↳ Parent: Project

Complexity

Total Complexity

Size/Duplication

Total Lines	54
Duplicated Lines	27.78 %

Importance

Changes

Metric	Value
wmc	3
eloc	36
dl	15
loc	54
rs	10
c	0
b	0
f	0

3 Functions

Rating	Name	Duplication	Size	Complexity
A	pca()	0	4	1
A	model()	15	15	1
A	none()	0	2	1

How to fix Duplicated Code

import numpy as np
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import cross_val_score
from sklearn.decomposition import PCA
from sklearn.feature_selection import SelectKBest, f_classif
from sklearn.ensemble import GradientBoostingClassifier
from hyperactive import Hyperactive

data = load_breast_cancer()
X, y = data.data, data.target


def pca(X):
    X = PCA(n_components=10).fit_transform(X)

    return X


def none(X):
    return X


def model(para, X, y):

    model = GradientBoostingClassifier(
        n_estimators=para["n_estimators"],
        max_depth=para["max_depth"],
        min_samples_split=para["min_samples_split"],
        min_samples_leaf=para["min_samples_leaf"],
    )

    X_pca = para["decomposition"](X)
    X = np.hstack((X, X_pca))

    X = SelectKBest(f_classif, k=para["k"]).fit_transform(X, y)
    scores = cross_val_score(model, X, y, cv=3)

    return scores.mean()


search_config = {
    model: {
        "decomposition": [pca, none],
        "k": range(2, 30),
        "n_estimators": range(10, 200, 10),
        "max_depth": range(2, 12),
        "min_samples_split": range(2, 12),
        "min_samples_leaf": range(1, 11),
    }
}


opt = Hyperactive(X, y)
opt.search(search_config, n_iter=100)


1		import numpy as np
2		from sklearn.datasets import load_breast_cancer
3		from sklearn.model_selection import cross_val_score
4		from sklearn.decomposition import PCA
5		from sklearn.feature_selection import SelectKBest, f_classif
6		from sklearn.ensemble import GradientBoostingClassifier
7		from hyperactive import Hyperactive
8
9		data = load_breast_cancer()
10		X, y = data.data, data.target
11
12
13		def pca(X):
14		X = PCA(n_components=10).fit_transform(X)
15
16		return X
17
18
19		def none(X):
20		return X
21
22
23	View Code Duplication	def model(para, X, y):
		0 ignored issues – show Duplication introduced 2019-12-09 16:37 UTC by Report Bug Copy Issue Report This code seems to be duplicated in your project. Loading history...
24		model = GradientBoostingClassifier(
25		n_estimators=para["n_estimators"],
26		max_depth=para["max_depth"],
27		min_samples_split=para["min_samples_split"],
28		min_samples_leaf=para["min_samples_leaf"],
29		)
30
31		X_pca = para["decomposition"](X)
32		X = np.hstack((X, X_pca))
33
34		X = SelectKBest(f_classif, k=para["k"]).fit_transform(X, y)
35		scores = cross_val_score(model, X, y, cv=3)
36
37		return scores.mean()
38
39
40		search_config = {
41		model: {
42		"decomposition": [pca, none],
43		"k": range(2, 30),
44		"n_estimators": range(10, 200, 10),
45		"max_depth": range(2, 12),
46		"min_samples_split": range(2, 12),
47		"min_samples_leaf": range(1, 11),
48		}
49		}
50
51
52		opt = Hyperactive(X, y)
53		opt.search(search_config, n_iter=100)
54

SimonBlanke / Hyperactive

Push — master ( ef01f2...fd3757 )

Sklearn Preprocessing A

Complexity

Size/Duplication

Importance

3 Functions

How to fix Duplicated Code

Duplicated Code

Duplication Side-by-Side

Filter issues like