ensemble_learning_example - Code Metrics - Inspection of "add doc to example for meta data collection" - SimonBlanke/Hyperactive - Measure and Improve Code Quality continuously with Scrutinizer

Passed

Push — master ( 36b6ae...3d094b )

by Simon

created 2021-01-28 16:44 UTC

ensemble_learning_example A

↳ Parent: Project

Complexity

Total Complexity

Size/Duplication

Total Lines	111
Duplicated Lines	0 %

Importance

Changes

Metric	Value
eloc	62
dl	0
loc	111
rs	10
c	0
b	0
f	0
wmc	8

2 Functions

Rating	Name	Duplication	Size	Complexity
A	stacking()	0	10	1
B	get_combinations()	0	16	7

"""
This example shows how you can search for the best models in each layer in a 
stacking ensemble. 

We want to create a stacking ensemble with 3 layers:
    - a top layer with one model
    - a middle layer with multiple models
    - a bottom layer with multiple models

We also want to know how many models should be used in the middle and bottom layer.
For that we can use the helper function "get_combinations". It works as follows:

input = [1, 2 , 3]
output = get_combinations(input, comb_len=2)
output: [[1, 2], [1, 3], [2, 3], [1, 2, 3]]

Instead of numbers we insert models into "input". This way we get each combination
with more than 2 elements. Only 1 model per layer would not make much sense.

The ensemble itself is created via the package "mlxtend" in the objective-function "stacking".
"""

import itertools

from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import cross_val_score
from mlxtend.classifier import StackingClassifier

from sklearn.ensemble import (
    GradientBoostingClassifier,
    RandomForestClassifier,
    ExtraTreesClassifier,
)

from sklearn.neighbors import KNeighborsClassifier
from sklearn.neural_network import MLPClassifier
from sklearn.gaussian_process import GaussianProcessClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.naive_bayes import GaussianNB

from sklearn.linear_model import LogisticRegression
from sklearn.linear_model import RidgeClassifier

from hyperactive import Hyperactive

data = load_breast_cancer()
X, y = data.data, data.target

# define models that are later used in search space
gbc = GradientBoostingClassifier()
rfc = RandomForestClassifier()
etc = ExtraTreesClassifier()

mlp = MLPClassifier()
gnb = GaussianNB()
gpc = GaussianProcessClassifier()
dtc = DecisionTreeClassifier()
knn = KNeighborsClassifier()

lr = LogisticRegression()
rc = RidgeClassifier()


def stacking(opt):
    stack_lvl_0 = StackingClassifier(
        classifiers=opt["lvl_0"], meta_classifier=opt["top"]
    )
    stack_lvl_1 = StackingClassifier(
        classifiers=opt["lvl_1"], meta_classifier=stack_lvl_0
    )
    scores = cross_val_score(stack_lvl_1, X, y, cv=3)

    return scores.mean()


# helper function to create search space dimensions
def get_combinations(models, comb_len=2):
    def _list_in_list_of_lists(list_, list_of_lists):
        for list__ in list_of_lists:
            if set(list_) == set(list__):
                return True

    comb_list = []
    for i in range(0, len(models) + 1):
        for subset in itertools.permutations(models, i):
            if len(subset) < comb_len:
                continue
            if _list_in_list_of_lists(subset, comb_list):
                continue

            comb_list.append(list(subset))
    return comb_list


top = [lr, dtc, gnb, rc]
models_0 = [gpc, dtc, mlp, gnb, knn]
models_1 = [gbc, rfc, etc]

stack_lvl_0_clfs = get_combinations(models_0)
stack_lvl_1_clfs = get_combinations(models_1)


search_space = {
    "lvl_1": stack_lvl_1_clfs,
    "lvl_0": stack_lvl_0_clfs,
    "top": top,
}
hyper = Hyperactive()
hyper.add_search(stacking, search_space, n_iter=20)
hyper.run()


1			"""
2			This example shows how you can search for the best models in each layer in a
3			stacking ensemble.
4
5			We want to create a stacking ensemble with 3 layers:
6			- a top layer with one model
7			- a middle layer with multiple models
8			- a bottom layer with multiple models
9
10			We also want to know how many models should be used in the middle and bottom layer.
11			For that we can use the helper function "get_combinations". It works as follows:
12
13			input = [1, 2 , 3]
14			output = get_combinations(input, comb_len=2)
15			output: [[1, 2], [1, 3], [2, 3], [1, 2, 3]]
16
17			Instead of numbers we insert models into "input". This way we get each combination
18			with more than 2 elements. Only 1 model per layer would not make much sense.
19
20			The ensemble itself is created via the package "mlxtend" in the objective-function "stacking".
21			"""
22
23			import itertools
24
25			from sklearn.datasets import load_breast_cancer
26			from sklearn.model_selection import cross_val_score
27			from mlxtend.classifier import StackingClassifier
28
29			from sklearn.ensemble import (
30			GradientBoostingClassifier,
31			RandomForestClassifier,
32			ExtraTreesClassifier,
33			)
34
35			from sklearn.neighbors import KNeighborsClassifier
36			from sklearn.neural_network import MLPClassifier
37			from sklearn.gaussian_process import GaussianProcessClassifier
38			from sklearn.tree import DecisionTreeClassifier
39			from sklearn.naive_bayes import GaussianNB
40
41			from sklearn.linear_model import LogisticRegression
42			from sklearn.linear_model import RidgeClassifier
43
44			from hyperactive import Hyperactive
45
46			data = load_breast_cancer()
47			X, y = data.data, data.target
48
49			# define models that are later used in search space
50			gbc = GradientBoostingClassifier()
51			rfc = RandomForestClassifier()
52			etc = ExtraTreesClassifier()
53
54			mlp = MLPClassifier()
55			gnb = GaussianNB()
56			gpc = GaussianProcessClassifier()
57			dtc = DecisionTreeClassifier()
58			knn = KNeighborsClassifier()
59
60			lr = LogisticRegression()
61			rc = RidgeClassifier()
62
63
64			def stacking(opt):
65			stack_lvl_0 = StackingClassifier(
66			classifiers=opt["lvl_0"], meta_classifier=opt["top"]
67			)
68			stack_lvl_1 = StackingClassifier(
69			classifiers=opt["lvl_1"], meta_classifier=stack_lvl_0
70			)
71			scores = cross_val_score(stack_lvl_1, X, y, cv=3)
72
73			return scores.mean()
74
75
76			# helper function to create search space dimensions
77			def get_combinations(models, comb_len=2):
78			def _list_in_list_of_lists(list_, list_of_lists):
79			for list__ in list_of_lists:
80			if set(list_) == set(list__):
81			return True
82
83			comb_list = []
84			for i in range(0, len(models) + 1):
85			for subset in itertools.permutations(models, i):
86			if len(subset) < comb_len:
87			continue
88			if _list_in_list_of_lists(subset, comb_list):
89			continue
90
91			comb_list.append(list(subset))
92			return comb_list
93
94
95			top = [lr, dtc, gnb, rc]
96			models_0 = [gpc, dtc, mlp, gnb, knn]
97			models_1 = [gbc, rfc, etc]
98
99			stack_lvl_0_clfs = get_combinations(models_0)
100			stack_lvl_1_clfs = get_combinations(models_1)
101
102
103			search_space = {
104			"lvl_1": stack_lvl_1_clfs,
105			"lvl_0": stack_lvl_0_clfs,
106			"top": top,
107			}
108			hyper = Hyperactive()
109			hyper.add_search(stacking, search_space, n_iter=20)
110			hyper.run()
111

SimonBlanke / Hyperactive

Push — master ( 36b6ae...3d094b )

ensemble_learning_example A

Complexity

Size/Duplication

Importance

2 Functions

Duplication Side-by-Side

Filter issues like