Passed
Push — master ( 3cdc83...fb0693 )
by Simon
02:09 queued 10s
created

optimizer_time.collect_data()   B

Complexity

Conditions 7

Size

Total Lines 71
Code Lines 48

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
eloc 48
dl 0
loc 71
rs 7.3018
c 0
b 0
f 0
cc 7
nop 8

How to fix   Long Method    Many Parameters   

Long Method

Small methods make your code easier to understand, in particular if combined with a good name. Besides, if your method is small, finding a good name is usually much easier.

For example, if you find yourself adding comments to a method's body, this is usually a good sign to extract the commented part to a new method, and use the comment as a starting point when coming up with a good name for this new method.

Commonly applied refactorings include:

Many Parameters

Methods with many parameters are not only hard to understand, but their parameters also often become inconsistent when you need more, or different data.

There are several approaches to avoid long parameter lists:

1
# Author: Simon Blanke
2
# Email: [email protected]
3
# License: MIT License
4
5
6
import time
7
import tqdm
8
import hyperactive
9
10
import numpy as np
11
import pandas as pd
12
from sklearn.model_selection import cross_val_score
13
14
15
from hyperactive import HillClimbingOptimizer
16
from hyperactive import StochasticHillClimbingOptimizer
17
from hyperactive import TabuOptimizer
18
from hyperactive import RandomSearchOptimizer
19
from hyperactive import RandomRestartHillClimbingOptimizer
20
from hyperactive import RandomAnnealingOptimizer
21
from hyperactive import SimulatedAnnealingOptimizer
22
from hyperactive import StochasticTunnelingOptimizer
23
from hyperactive import ParallelTemperingOptimizer
24
from hyperactive import ParticleSwarmOptimizer
25
from hyperactive import EvolutionStrategyOptimizer
26
from hyperactive import BayesianOptimizer
27
28
version = str("_v" + hyperactive.__version__)
29
30
#################################################################################################
31
32
runs = 10
33
n_iter = 100
34
35
opt_dict = {"cv": 3, "n_jobs": 1, "memory": False, "verbosity": 0}
36
37
opt_list = {
38
    "Hill Climbing": HillClimbingOptimizer,
39
    "Stoch. Hill Climbing": StochasticHillClimbingOptimizer,
40
    "Tabu Search": TabuOptimizer,
41
    "Random Search": RandomSearchOptimizer,
42
    "Rand. Rest. Hill Climbing": RandomRestartHillClimbingOptimizer,
43
    "Random Annealing": RandomAnnealingOptimizer,
44
    "Simulated Annealing": SimulatedAnnealingOptimizer,
45
    "Stochastic Tunneling": StochasticTunnelingOptimizer,
46
    "Parallel Tempering": ParallelTemperingOptimizer,
47
    "Particle Swarm": ParticleSwarmOptimizer,
48
    "Evolution Strategy": EvolutionStrategyOptimizer,
49
    "Bayesian Optimization": BayesianOptimizer,
50
}
51
52
#################################################################################################
53
54
55
def collect_data(runs, X, y, sklearn_model, opt_list, search_config, n_iter, opt_dict):
56
    time_c = time.time()
57
58
    data_runs = []
59
    for run in range(runs):
60
        print("\nRun nr.", run, "\n")
61
        time_opt = []
62
63
        start = time.perf_counter()
64
        for i in tqdm.tqdm(range(n_iter)):
65
            scores = cross_val_score(
66
                sklearn_model,
67
                X,
68
                y,
69
                scoring="accuracy",
70
                n_jobs=opt_dict["n_jobs"],
71
                cv=opt_dict["cv"],
72
            )
73
        time_ = time.perf_counter() - start
74
75
        time_opt.append(time_)
76
        # data["No Opt"]["0"] = time_
77
78
        for key in opt_list.keys():
79
            print("\n optimizer:", key)
80
81
            n_iter_temp = n_iter
82
            opt_dict_temp = opt_dict
83
84
            if key == "Parallel Tempering":
85
                n_iter_temp = int(n_iter / 5)
86
                opt_dict_temp["system_temps"] = [0.1, 0.2, 0.01, 0.2, 0.01]
87
88
            if key == "Particle Swarm":
89
                n_iter_temp = int(n_iter / 5)
90
                opt_dict_temp["n_part"] = 5
91
92
            if key == "Evolution Strategy":
93
                n_iter_temp = int(n_iter / 5)
94
                opt_dict_temp["individuals"] = 5
95
96
            opt_obj = opt_list[key](search_config, n_iter_temp, **opt_dict_temp)
97
98
            start = time.perf_counter()
99
            opt_obj.fit(X, y)
100
            time_ = time.perf_counter() - start
101
102
            time_opt.append(time_)
103
104
        time_opt = np.array(time_opt)
105
        time_opt = time_opt / n_iter
106
        # time_opt = np.expand_dims(time_opt_norm, axis=0)
107
108
        data_runs.append(time_opt)
109
110
    data_runs = np.array(data_runs)
111
    print("\nCreate Dataframe\n")
112
113
    print("data_runs", data_runs, data_runs.shape)
114
115
    column_names = ["No Opt."] + list(opt_list.keys())
116
    data = pd.DataFrame(data_runs, columns=column_names)
117
118
    model_name = list(search_config.keys())[0]
119
120
    calc_optimizer_time_name = "optimizer_calc_time_" + model_name
121
122
    file_name = str(calc_optimizer_time_name)
123
    data.to_csv(file_name, index=False)
124
125
    print("data collecting time:", time.time() - time_c)
126
127
128
#################################################################################################
129
from sklearn.datasets import load_iris
130
from sklearn.neighbors import KNeighborsClassifier
131
132
iris_data = load_iris()
133
iris_X, iris_y = iris_data.data, iris_data.target
134
135
KNN = KNeighborsClassifier()
136
137
search_config_KNN = {
138
    "sklearn.neighbors.KNeighborsClassifier": {"n_neighbors": range(5, 7), "p": [1, 2]}
139
}
140
141
data_runs = collect_data(
142
    runs=runs,
143
    X=iris_X,
144
    y=iris_y,
145
    sklearn_model=KNN,
146
    opt_list=opt_list,
147
    search_config=search_config_KNN,
148
    n_iter=n_iter,
149
    opt_dict=opt_dict,
150
)
151
152
#################################################################################################
153
from sklearn.datasets import load_breast_cancer
154
from sklearn.ensemble import GradientBoostingClassifier
155
156
cancer_data = load_breast_cancer()
157
cancer_X, cancer_y = cancer_data.data, cancer_data.target
158
159
GBC = GradientBoostingClassifier()
160
161
search_config_GBC = {
162
    "sklearn.ensemble.GradientBoostingClassifier": {
163
        "n_estimators": range(99, 102),
164
        "max_depth": range(3, 4),
165
    }
166
}
167
168
data_runs = collect_data(
169
    runs=runs,
170
    X=cancer_X,
171
    y=cancer_y,
172
    sklearn_model=GBC,
173
    opt_list=opt_list,
174
    search_config=search_config_GBC,
175
    n_iter=n_iter,
176
    opt_dict=opt_dict,
177
)
178
179
#################################################################################################
180
from sklearn.datasets import load_breast_cancer
181
from sklearn.tree import DecisionTreeClassifier
182
183
cancer_data = load_breast_cancer()
184
cancer_X, cancer_y = cancer_data.data, cancer_data.target
185
186
DTC = DecisionTreeClassifier()
187
188
search_config_DTC = {
189
    "sklearn.tree.DecisionTreeClassifier": {
190
        "min_samples_split": [2, 3],
191
        "min_samples_leaf": [1, 2],
192
    }
193
}
194
195
data_runs = collect_data(
196
    runs=runs,
197
    X=cancer_X,
198
    y=cancer_y,
199
    sklearn_model=DTC,
200
    opt_list=opt_list,
201
    search_config=search_config_DTC,
202
    n_iter=n_iter,
203
    opt_dict=opt_dict,
204
)
205
206
#################################################################################################
207
from sklearn.datasets import load_breast_cancer
208
from sklearn.ensemble import GradientBoostingClassifier
209
210
cancer_data = load_breast_cancer()
211
cancer_X, cancer_y = cancer_data.data, cancer_data.target
212
213
GBC = GradientBoostingClassifier()
214
215
search_config_GBC = {
216
    "sklearn.ensemble.GradientBoostingClassifier": {
217
        "n_estimators": range(99, 102),
218
        "max_depth": range(3, 4),
219
    }
220
}
221
222
data_runs = collect_data(
223
    runs=runs,
224
    X=cancer_X,
225
    y=cancer_y,
226
    sklearn_model=GBC,
227
    opt_list=opt_list,
228
    search_config=search_config_GBC,
229
    n_iter=n_iter,
230
    opt_dict=opt_dict,
231
)
232
233
#################################################################################################
234
from sklearn.datasets import load_breast_cancer
235
from lightgbm import LGBMClassifier
236
237
cancer_data = load_breast_cancer()
238
cancer_X, cancer_y = cancer_data.data, cancer_data.target
239
240
LGBMC = LGBMClassifier()
241
242
search_config_LGBMC = {
243
    "lightgbm.LGBMClassifier": {"num_leaves": [31, 32], "n_estimators": [100, 101]}
244
}
245
246
data_runs = collect_data(
247
    runs=runs,
248
    X=cancer_X,
249
    y=cancer_y,
250
    sklearn_model=LGBMC,
251
    opt_list=opt_list,
252
    search_config=search_config_LGBMC,
253
    n_iter=n_iter,
254
    opt_dict=opt_dict,
255
)
256