Passed
Push — master ( 36b6ae...3d094b )
by Simon
01:24
created

ensemble_learning_example   A

Complexity

Total Complexity 8

Size/Duplication

Total Lines 111
Duplicated Lines 0 %

Importance

Changes 0
Metric Value
eloc 62
dl 0
loc 111
rs 10
c 0
b 0
f 0
wmc 8

2 Functions

Rating   Name   Duplication   Size   Complexity  
A stacking() 0 10 1
B get_combinations() 0 16 7
1
"""
2
This example shows how you can search for the best models in each layer in a 
3
stacking ensemble. 
4
5
We want to create a stacking ensemble with 3 layers:
6
    - a top layer with one model
7
    - a middle layer with multiple models
8
    - a bottom layer with multiple models
9
10
We also want to know how many models should be used in the middle and bottom layer.
11
For that we can use the helper function "get_combinations". It works as follows:
12
13
input = [1, 2 , 3]
14
output = get_combinations(input, comb_len=2)
15
output: [[1, 2], [1, 3], [2, 3], [1, 2, 3]]
16
17
Instead of numbers we insert models into "input". This way we get each combination
18
with more than 2 elements. Only 1 model per layer would not make much sense.
19
20
The ensemble itself is created via the package "mlxtend" in the objective-function "stacking".
21
"""
22
23
import itertools
24
25
from sklearn.datasets import load_breast_cancer
26
from sklearn.model_selection import cross_val_score
27
from mlxtend.classifier import StackingClassifier
28
29
from sklearn.ensemble import (
30
    GradientBoostingClassifier,
31
    RandomForestClassifier,
32
    ExtraTreesClassifier,
33
)
34
35
from sklearn.neighbors import KNeighborsClassifier
36
from sklearn.neural_network import MLPClassifier
37
from sklearn.gaussian_process import GaussianProcessClassifier
38
from sklearn.tree import DecisionTreeClassifier
39
from sklearn.naive_bayes import GaussianNB
40
41
from sklearn.linear_model import LogisticRegression
42
from sklearn.linear_model import RidgeClassifier
43
44
from hyperactive import Hyperactive
45
46
data = load_breast_cancer()
47
X, y = data.data, data.target
48
49
# define models that are later used in search space
50
gbc = GradientBoostingClassifier()
51
rfc = RandomForestClassifier()
52
etc = ExtraTreesClassifier()
53
54
mlp = MLPClassifier()
55
gnb = GaussianNB()
56
gpc = GaussianProcessClassifier()
57
dtc = DecisionTreeClassifier()
58
knn = KNeighborsClassifier()
59
60
lr = LogisticRegression()
61
rc = RidgeClassifier()
62
63
64
def stacking(opt):
65
    stack_lvl_0 = StackingClassifier(
66
        classifiers=opt["lvl_0"], meta_classifier=opt["top"]
67
    )
68
    stack_lvl_1 = StackingClassifier(
69
        classifiers=opt["lvl_1"], meta_classifier=stack_lvl_0
70
    )
71
    scores = cross_val_score(stack_lvl_1, X, y, cv=3)
72
73
    return scores.mean()
74
75
76
# helper function to create search space dimensions
77
def get_combinations(models, comb_len=2):
78
    def _list_in_list_of_lists(list_, list_of_lists):
79
        for list__ in list_of_lists:
80
            if set(list_) == set(list__):
81
                return True
82
83
    comb_list = []
84
    for i in range(0, len(models) + 1):
85
        for subset in itertools.permutations(models, i):
86
            if len(subset) < comb_len:
87
                continue
88
            if _list_in_list_of_lists(subset, comb_list):
89
                continue
90
91
            comb_list.append(list(subset))
92
    return comb_list
93
94
95
top = [lr, dtc, gnb, rc]
96
models_0 = [gpc, dtc, mlp, gnb, knn]
97
models_1 = [gbc, rfc, etc]
98
99
stack_lvl_0_clfs = get_combinations(models_0)
100
stack_lvl_1_clfs = get_combinations(models_1)
101
102
103
search_space = {
104
    "lvl_1": stack_lvl_1_clfs,
105
    "lvl_0": stack_lvl_0_clfs,
106
    "top": top,
107
}
108
hyper = Hyperactive()
109
hyper.add_search(stacking, search_space, n_iter=20)
110
hyper.run()
111