ensemble_learning_example   A
last analyzed

Complexity

Total Complexity 13

Size/Duplication

Total Lines 138
Duplicated Lines 0 %

Importance

Changes 0
Metric Value
eloc 74
dl 0
loc 138
rs 10
c 0
b 0
f 0
wmc 13

6 Functions

Rating   Name   Duplication   Size   Complexity  
A stacking() 0 10 1
A gnb_f() 0 2 1
B get_combinations() 0 26 8
A lr_f() 0 2 1
A dtc_f() 0 2 1
A rc_f() 0 2 1
1
"""
2
This example shows how you can search for the best models in each layer in a 
3
stacking ensemble. 
4
5
We want to create a stacking ensemble with 3 layers:
6
    - a top layer with one model
7
    - a middle layer with multiple models
8
    - a bottom layer with multiple models
9
10
We also want to know how many models should be used in the middle and bottom layer.
11
For that we can use the helper function "get_combinations". It works as follows:
12
13
input = [1, 2 , 3]
14
output = get_combinations(input, comb_len=2)
15
output: [[1, 2], [1, 3], [2, 3], [1, 2, 3]]
16
17
Instead of numbers we insert models into "input". This way we get each combination
18
with more than 2 elements. Only 1 model per layer would not make much sense.
19
20
The ensemble itself is created via the package "mlxtend" in the objective-function "stacking".
21
"""
22
23
import itertools
24
25
from sklearn.datasets import load_breast_cancer
26
from sklearn.model_selection import cross_val_score
27
from mlxtend.classifier import StackingClassifier
28
29
from sklearn.ensemble import (
30
    GradientBoostingClassifier,
31
    RandomForestClassifier,
32
    ExtraTreesClassifier,
33
)
34
35
from sklearn.neighbors import KNeighborsClassifier
36
from sklearn.neural_network import MLPClassifier
37
from sklearn.gaussian_process import GaussianProcessClassifier
38
from sklearn.tree import DecisionTreeClassifier
39
from sklearn.naive_bayes import GaussianNB
40
41
from sklearn.linear_model import LogisticRegression
42
from sklearn.linear_model import RidgeClassifier
43
44
from hyperactive import Hyperactive
45
46
data = load_breast_cancer()
47
X, y = data.data, data.target
48
49
# define models that are used in search space
50
gbc = GradientBoostingClassifier()
51
rfc = RandomForestClassifier()
52
etc = ExtraTreesClassifier()
53
54
mlp = MLPClassifier()
55
gnb = GaussianNB()
56
gpc = GaussianProcessClassifier()
57
dtc = DecisionTreeClassifier()
58
knn = KNeighborsClassifier()
59
60
lr = LogisticRegression()
61
rc = RidgeClassifier()
62
63
64
def stacking(opt):
65
    lvl_1_ = opt["lvl_1"]()
66
    lvl_0_ = opt["lvl_0"]()
67
    top_ = opt["top"]()
68
69
    stack_lvl_0 = StackingClassifier(classifiers=lvl_0_, meta_classifier=top_)
70
    stack_lvl_1 = StackingClassifier(classifiers=lvl_1_, meta_classifier=stack_lvl_0)
71
    scores = cross_val_score(stack_lvl_1, X, y, cv=3)
72
73
    return scores.mean()
74
75
76
# helper function to create search space dimensions
77
def get_combinations(models, comb_len=2):
78
    def _list_in_list_of_lists(list_, list_of_lists):
79
        for list__ in list_of_lists:
80
            if set(list_) == set(list__):
81
                return True
82
83
    comb_list = []
84
    for i in range(0, len(models) + 1):
85
        for subset in itertools.permutations(models, i):
86
            if len(subset) < comb_len:
87
                continue
88
            if _list_in_list_of_lists(subset, comb_list):
89
                continue
90
91
            comb_list.append(list(subset))
92
93
    comb_list_f = []
94
    for comb_ in comb_list:
95
96
        def _func_():
97
            return comb_
0 ignored issues
show
introduced by
The variable comb_ does not seem to be defined in case the for loop on line 94 is not entered. Are you sure this can never be the case?
Loading history...
98
99
        _func_.__name__ = str(i) + "___" + str(comb_)
0 ignored issues
show
introduced by
The variable i does not seem to be defined in case the for loop on line 84 is not entered. Are you sure this can never be the case?
Loading history...
100
        comb_list_f.append(_func_)
101
102
    return comb_list_f
103
104
105
def lr_f():
106
    return lr
107
108
109
def dtc_f():
110
    return dtc
111
112
113
def gnb_f():
114
    return gnb
115
116
117
def rc_f():
118
    return rc
119
120
121
models_0 = [gpc, dtc, mlp, gnb, knn]
122
models_1 = [gbc, rfc, etc]
123
124
stack_lvl_0_clfs = get_combinations(models_0)
125
stack_lvl_1_clfs = get_combinations(models_1)
126
127
128
print("\n stack_lvl_0_clfs \n", stack_lvl_0_clfs, "\n")
129
130
131
search_space = {
132
    "lvl_1": stack_lvl_1_clfs,
133
    "lvl_0": stack_lvl_0_clfs,
134
    "top": [lr_f, dtc_f, gnb_f, rc_f],
135
}
136
137
"""
138
hyper = Hyperactive()
139
hyper.add_search(stacking, search_space, n_iter=3)
140
hyper.run()
141
"""
142