ensemble_learning_example.get_combinations() - Code Metrics - SimonBlanke/Hyperactive - Measure and Improve Code Quality continuously with Scrutinizer

ensemble_learning_example.get_combinations() B
last analyzed 2025-08-16 19:01 UTC

↳ Parent: ensemble_learning_example

Complexity

Conditions

Size

Total Lines	26
Code Lines	20

Duplication

Lines	0
Ratio	0 %

Importance

Changes

Metric	Value
cc	8
eloc	20
nop	2
dl	0
loc	26
rs	7.3333
c	0
b	0
f	0

"""
This example shows how you can search for the best models in each layer in a
stacking ensemble.

We want to create a stacking ensemble with 3 layers:
    - a top layer with one model
    - a middle layer with multiple models
    - a bottom layer with multiple models

We also want to know how many models should be used in the middle and bottom layer.
For that we can use the helper function "get_combinations". It works as follows:

input = [1, 2 , 3]
output = get_combinations(input, comb_len=2)
output: [[1, 2], [1, 3], [2, 3], [1, 2, 3]]

Instead of numbers we insert models into "input". This way we get each combination
with more than 2 elements. Only 1 model per layer would not make much sense.

The ensemble itself is created via the package "mlxtend" in the objective-function "stacking".
"""

import itertools

from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import cross_val_score
from mlxtend.classifier import StackingClassifier

from sklearn.ensemble import (
    GradientBoostingClassifier,
    RandomForestClassifier,
    ExtraTreesClassifier,
)

from sklearn.neighbors import KNeighborsClassifier
from sklearn.neural_network import MLPClassifier
from sklearn.gaussian_process import GaussianProcessClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.naive_bayes import GaussianNB

from sklearn.linear_model import LogisticRegression
from sklearn.linear_model import RidgeClassifier

from hyperactive import Hyperactive

data = load_breast_cancer()
X, y = data.data, data.target

# define models that are used in search space
gbc = GradientBoostingClassifier()
rfc = RandomForestClassifier()
etc = ExtraTreesClassifier()

mlp = MLPClassifier()
gnb = GaussianNB()
gpc = GaussianProcessClassifier()
dtc = DecisionTreeClassifier()
knn = KNeighborsClassifier()

lr = LogisticRegression()
rc = RidgeClassifier()


def stacking(opt):
    lvl_1_ = opt["lvl_1"]()
    lvl_0_ = opt["lvl_0"]()
    top_ = opt["top"]()

    stack_lvl_0 = StackingClassifier(classifiers=lvl_0_, meta_classifier=top_)
    stack_lvl_1 = StackingClassifier(classifiers=lvl_1_, meta_classifier=stack_lvl_0)
    scores = cross_val_score(stack_lvl_1, X, y, cv=3)

    return scores.mean()


# helper function to create search space dimensions
def get_combinations(models, comb_len=2):
    def _list_in_list_of_lists(list_, list_of_lists):
        for list__ in list_of_lists:
            if set(list_) == set(list__):
                return True

    comb_list = []
    for i in range(0, len(models) + 1):
        for subset in itertools.permutations(models, i):
            if len(subset) < comb_len:
                continue
            if _list_in_list_of_lists(subset, comb_list):
                continue

            comb_list.append(list(subset))

    comb_list_f = []
    for comb_ in comb_list:

        def _func_():
            return comb_


        _func_.__name__ = str(i) + "___" + str(comb_)

        comb_list_f.append(_func_)

    return comb_list_f


def lr_f():
    return lr


def dtc_f():
    return dtc


def gnb_f():
    return gnb


def rc_f():
    return rc


models_0 = [gpc, dtc, mlp, gnb, knn]
models_1 = [gbc, rfc, etc]

stack_lvl_0_clfs = get_combinations(models_0)
stack_lvl_1_clfs = get_combinations(models_1)


print("\n stack_lvl_0_clfs \n", stack_lvl_0_clfs, "\n")


search_space = {
    "lvl_1": stack_lvl_1_clfs,
    "lvl_0": stack_lvl_0_clfs,
    "top": [lr_f, dtc_f, gnb_f, rc_f],
}

"""
hyper = Hyperactive()
hyper.add_search(stacking, search_space, n_iter=3)
hyper.run()
"""


1			"""
2			This example shows how you can search for the best models in each layer in a
3			stacking ensemble.
4
5			We want to create a stacking ensemble with 3 layers:
6			- a top layer with one model
7			- a middle layer with multiple models
8			- a bottom layer with multiple models
9
10			We also want to know how many models should be used in the middle and bottom layer.
11			For that we can use the helper function "get_combinations". It works as follows:
12
13			input = [1, 2 , 3]
14			output = get_combinations(input, comb_len=2)
15			output: [[1, 2], [1, 3], [2, 3], [1, 2, 3]]
16
17			Instead of numbers we insert models into "input". This way we get each combination
18			with more than 2 elements. Only 1 model per layer would not make much sense.
19
20			The ensemble itself is created via the package "mlxtend" in the objective-function "stacking".
21			"""
22
23			import itertools
24
25			from sklearn.datasets import load_breast_cancer
26			from sklearn.model_selection import cross_val_score
27			from mlxtend.classifier import StackingClassifier
28
29			from sklearn.ensemble import (
30			GradientBoostingClassifier,
31			RandomForestClassifier,
32			ExtraTreesClassifier,
33			)
34
35			from sklearn.neighbors import KNeighborsClassifier
36			from sklearn.neural_network import MLPClassifier
37			from sklearn.gaussian_process import GaussianProcessClassifier
38			from sklearn.tree import DecisionTreeClassifier
39			from sklearn.naive_bayes import GaussianNB
40
41			from sklearn.linear_model import LogisticRegression
42			from sklearn.linear_model import RidgeClassifier
43
44			from hyperactive import Hyperactive
45
46			data = load_breast_cancer()
47			X, y = data.data, data.target
48
49			# define models that are used in search space
50			gbc = GradientBoostingClassifier()
51			rfc = RandomForestClassifier()
52			etc = ExtraTreesClassifier()
53
54			mlp = MLPClassifier()
55			gnb = GaussianNB()
56			gpc = GaussianProcessClassifier()
57			dtc = DecisionTreeClassifier()
58			knn = KNeighborsClassifier()
59
60			lr = LogisticRegression()
61			rc = RidgeClassifier()
62
63
64			def stacking(opt):
65			lvl_1_ = opt["lvl_1"]()
66			lvl_0_ = opt["lvl_0"]()
67			top_ = opt["top"]()
68
69			stack_lvl_0 = StackingClassifier(classifiers=lvl_0_, meta_classifier=top_)
70			stack_lvl_1 = StackingClassifier(classifiers=lvl_1_, meta_classifier=stack_lvl_0)
71			scores = cross_val_score(stack_lvl_1, X, y, cv=3)
72
73			return scores.mean()
74
75
76			# helper function to create search space dimensions
77			def get_combinations(models, comb_len=2):
78			def _list_in_list_of_lists(list_, list_of_lists):
79			for list__ in list_of_lists:
80			if set(list_) == set(list__):
81			return True
82
83			comb_list = []
84			for i in range(0, len(models) + 1):
85			for subset in itertools.permutations(models, i):
86			if len(subset) < comb_len:
87			continue
88			if _list_in_list_of_lists(subset, comb_list):
89			continue
90
91			comb_list.append(list(subset))
92
93			comb_list_f = []
94			for comb_ in comb_list:
95
96			def _func_():
97			return comb_
			0 ignored issues – show introduced 2022-03-16 12:55 UTC by Report Bug Copy Issue Report The variable `comb_` does not seem to be defined in case the `for` loop on line `94` is not entered. Are you sure this can never be the case? Loading history...
98
99			_func_.__name__ = str(i) + "___" + str(comb_)
			0 ignored issues – show introduced 2022-03-16 12:55 UTC by Report Bug Copy Issue Report The variable `i` does not seem to be defined in case the `for` loop on line `84` is not entered. Are you sure this can never be the case? Loading history...
100			comb_list_f.append(_func_)
101
102			return comb_list_f
103
104
105			def lr_f():
106			return lr
107
108
109			def dtc_f():
110			return dtc
111
112
113			def gnb_f():
114			return gnb
115
116
117			def rc_f():
118			return rc
119
120
121			models_0 = [gpc, dtc, mlp, gnb, knn]
122			models_1 = [gbc, rfc, etc]
123
124			stack_lvl_0_clfs = get_combinations(models_0)
125			stack_lvl_1_clfs = get_combinations(models_1)
126
127
128			print("\n stack_lvl_0_clfs \n", stack_lvl_0_clfs, "\n")
129
130
131			search_space = {
132			"lvl_1": stack_lvl_1_clfs,
133			"lvl_0": stack_lvl_0_clfs,
134			"top": [lr_f, dtc_f, gnb_f, rc_f],
135			}
136
137			"""
138			hyper = Hyperactive()
139			hyper.add_search(stacking, search_space, n_iter=3)
140			hyper.run()
141			"""
142

SimonBlanke / Hyperactive

ensemble_learning_example.get_combinations() B last analyzed 2025-08-16 19:01 UTC

Complexity

Size

Duplication

Importance

Duplication Side-by-Side

Filter issues like

ensemble_learning_example.get_combinations() B
last analyzed 2025-08-16 19:01 UTC