Issues in joblib_example.py - All Issues - Inspection of "renaming example" - SimonBlanke/Hyperactive - Measure and Improve Code Quality continuously with Scrutinizer

Passed

Push — master ( b87fc6...1c02b4 )

by Simon

created 2021-02-17 08:48 UTC

tested_and_supported_packages/joblib_example.py (3 issues)

Labels

Duplication 3

Severity

Informational 3

import numpy as np
from sklearn.model_selection import cross_val_score
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.ensemble import ExtraTreesClassifier
from xgboost import XGBClassifier
from sklearn.datasets import load_breast_cancer
from hyperactive import Hyperactive

data = load_breast_cancer()
X, y = data.data, data.target


def model_etc(opt):

    etc = ExtraTreesClassifier(
        n_estimators=opt["n_estimators"],
        criterion=opt["criterion"],
        max_features=opt["max_features"],
        min_samples_split=opt["min_samples_split"],
        min_samples_leaf=opt["min_samples_leaf"],
        bootstrap=opt["bootstrap"],
    )
    scores = cross_val_score(etc, X, y, cv=3)

    return scores.mean()


def model_rfc(opt):

    rfc = RandomForestClassifier(
        n_estimators=opt["n_estimators"],
        criterion=opt["criterion"],
        max_features=opt["max_features"],
        min_samples_split=opt["min_samples_split"],
        min_samples_leaf=opt["min_samples_leaf"],
        bootstrap=opt["bootstrap"],
    )
    scores = cross_val_score(rfc, X, y, cv=3)

    return scores.mean()


def model_gbc(opt):

    gbc = GradientBoostingClassifier(
        n_estimators=opt["n_estimators"],
        learning_rate=opt["learning_rate"],
        max_depth=opt["max_depth"],
        min_samples_split=opt["min_samples_split"],
        min_samples_leaf=opt["min_samples_leaf"],
        subsample=opt["subsample"],
        max_features=opt["max_features"],
    )
    scores = cross_val_score(gbc, X, y, cv=3)

    return scores.mean()


search_space_etc = {
    "n_estimators": list(range(10, 200, 10)),
    "criterion": ["gini", "entropy"],
    "max_features": list(np.arange(0.05, 1.01, 0.05)),
    "min_samples_split": list(range(2, 21)),
    "min_samples_leaf": list(range(1, 21)),
    "bootstrap": [True, False],
}


search_space_rfc = {
    "n_estimators": list(range(10, 200, 10)),
    "criterion": ["gini", "entropy"],
    "max_features": list(np.arange(0.05, 1.01, 0.05)),
    "min_samples_split": list(range(2, 21)),
    "min_samples_leaf": list(range(1, 21)),
    "bootstrap": [True, False],
}


search_space_gbc = {
    "n_estimators": list(range(10, 200, 10)),
    "learning_rate": [1e-3, 1e-2, 1e-1, 0.5, 1.0],
    "max_depth": list(range(1, 11)),
    "min_samples_split": list(range(2, 21)),
    "min_samples_leaf": list(range(1, 21)),
    "subsample": list(np.arange(0.05, 1.01, 0.05)),
    "max_features": list(np.arange(0.05, 1.01, 0.05)),
}


hyper = Hyperactive(distribution="joblib")
hyper.add_search(model_etc, search_space_etc, n_iter=50)
hyper.add_search(model_rfc, search_space_rfc, n_iter=50)
hyper.add_search(model_gbc, search_space_gbc, n_iter=50)
hyper.run(max_time=5)


1		import numpy as np
2		from sklearn.model_selection import cross_val_score
3		from sklearn.ensemble import GradientBoostingClassifier
4		from sklearn.ensemble import RandomForestClassifier
5		from sklearn.ensemble import ExtraTreesClassifier
6		from xgboost import XGBClassifier
7		from sklearn.datasets import load_breast_cancer
8		from hyperactive import Hyperactive
9
10		data = load_breast_cancer()
11		X, y = data.data, data.target
12
13
14	View Code Duplication	def model_etc(opt):
		0 ignored issues – show Duplication introduced 2021-01-24 15:40 UTC by Report Bug Copy Issue Report Show Similar Issues like this This code seems to be duplicated in your project. Loading history...
15		etc = ExtraTreesClassifier(
16		n_estimators=opt["n_estimators"],
17		criterion=opt["criterion"],
18		max_features=opt["max_features"],
19		min_samples_split=opt["min_samples_split"],
20		min_samples_leaf=opt["min_samples_leaf"],
21		bootstrap=opt["bootstrap"],
22		)
23		scores = cross_val_score(etc, X, y, cv=3)
24
25		return scores.mean()
26
27
28	View Code Duplication	def model_rfc(opt):
		0 ignored issues – show Duplication introduced 2021-01-24 15:40 UTC by Report Bug Copy Issue Report Show Similar Issues like this This code seems to be duplicated in your project. Loading history...
29		rfc = RandomForestClassifier(
30		n_estimators=opt["n_estimators"],
31		criterion=opt["criterion"],
32		max_features=opt["max_features"],
33		min_samples_split=opt["min_samples_split"],
34		min_samples_leaf=opt["min_samples_leaf"],
35		bootstrap=opt["bootstrap"],
36		)
37		scores = cross_val_score(rfc, X, y, cv=3)
38
39		return scores.mean()
40
41
42	View Code Duplication	def model_gbc(opt):
		0 ignored issues – show Duplication introduced 2021-01-24 15:40 UTC by Report Bug Copy Issue Report Show Similar Issues like this This code seems to be duplicated in your project. Loading history...
43		gbc = GradientBoostingClassifier(
44		n_estimators=opt["n_estimators"],
45		learning_rate=opt["learning_rate"],
46		max_depth=opt["max_depth"],
47		min_samples_split=opt["min_samples_split"],
48		min_samples_leaf=opt["min_samples_leaf"],
49		subsample=opt["subsample"],
50		max_features=opt["max_features"],
51		)
52		scores = cross_val_score(gbc, X, y, cv=3)
53
54		return scores.mean()
55
56
57		search_space_etc = {
58		"n_estimators": list(range(10, 200, 10)),
59		"criterion": ["gini", "entropy"],
60		"max_features": list(np.arange(0.05, 1.01, 0.05)),
61		"min_samples_split": list(range(2, 21)),
62		"min_samples_leaf": list(range(1, 21)),
63		"bootstrap": [True, False],
64		}
65
66
67		search_space_rfc = {
68		"n_estimators": list(range(10, 200, 10)),
69		"criterion": ["gini", "entropy"],
70		"max_features": list(np.arange(0.05, 1.01, 0.05)),
71		"min_samples_split": list(range(2, 21)),
72		"min_samples_leaf": list(range(1, 21)),
73		"bootstrap": [True, False],
74		}
75
76
77		search_space_gbc = {
78		"n_estimators": list(range(10, 200, 10)),
79		"learning_rate": [1e-3, 1e-2, 1e-1, 0.5, 1.0],
80		"max_depth": list(range(1, 11)),
81		"min_samples_split": list(range(2, 21)),
82		"min_samples_leaf": list(range(1, 21)),
83		"subsample": list(np.arange(0.05, 1.01, 0.05)),
84		"max_features": list(np.arange(0.05, 1.01, 0.05)),
85		}
86
87
88		hyper = Hyperactive(distribution="joblib")
89		hyper.add_search(model_etc, search_space_etc, n_iter=50)
90		hyper.add_search(model_rfc, search_space_rfc, n_iter=50)
91		hyper.add_search(model_gbc, search_space_gbc, n_iter=50)
92		hyper.run(max_time=5)
93

SimonBlanke / Hyperactive

Push — master ( b87fc6...1c02b4 )

tested_and_supported_packages/joblib_example.py (3 issues)

Labels

Severity

Introduced By

Duplication Side-by-Side

Filter issues like