Passed
Push — master ( ccc735...5b9e9c )
by Simon
01:52
created

optimizer_time.collect_data()   B

Complexity

Conditions 7

Size

Total Lines 71
Code Lines 48

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
eloc 48
dl 0
loc 71
rs 7.3018
c 0
b 0
f 0
cc 7
nop 8

How to fix   Long Method    Many Parameters   

Long Method

Small methods make your code easier to understand, in particular if combined with a good name. Besides, if your method is small, finding a good name is usually much easier.

For example, if you find yourself adding comments to a method's body, this is usually a good sign to extract the commented part to a new method, and use the comment as a starting point when coming up with a good name for this new method.

Commonly applied refactorings include:

Many Parameters

Methods with many parameters are not only hard to understand, but their parameters also often become inconsistent when you need more, or different data.

There are several approaches to avoid long parameter lists:

1
# Author: Simon Blanke
2
# Email: [email protected]
3
# License: MIT License
4
5
6
import time
7
import tqdm
8
import hyperactive
9
10
import numpy as np
11
import pandas as pd
12
from sklearn.model_selection import cross_val_score
13
14
15
from keras.datasets import cifar10
16
17
from keras.utils import to_categorical
18
from keras.wrappers.scikit_learn import KerasClassifier
19
20
21
from hyperactive import HillClimbingOptimizer
22
from hyperactive import StochasticHillClimbingOptimizer
23
from hyperactive import TabuOptimizer
24
from hyperactive import RandomSearchOptimizer
25
from hyperactive import RandomRestartHillClimbingOptimizer
26
from hyperactive import RandomAnnealingOptimizer
27
from hyperactive import SimulatedAnnealingOptimizer
28
from hyperactive import StochasticTunnelingOptimizer
29
from hyperactive import ParallelTemperingOptimizer
30
from hyperactive import ParticleSwarmOptimizer
31
from hyperactive import EvolutionStrategyOptimizer
32
from hyperactive import BayesianOptimizer
33
34
version = str("_v" + hyperactive.__version__)
35
36
#################################################################################################
37
38
runs = 10
39
n_iter = 100
40
41
opt_dict = {"cv": 3, "n_jobs": 1, "memory": False, "verbosity": 0}
42
43
opt_list = {
44
    "Hill Climbing": HillClimbingOptimizer,
45
    "Stoch. Hill Climbing": StochasticHillClimbingOptimizer,
46
    "Tabu Search": TabuOptimizer,
47
    "Random Search": RandomSearchOptimizer,
48
    "Rand. Rest. Hill Climbing": RandomRestartHillClimbingOptimizer,
49
    "Random Annealing": RandomAnnealingOptimizer,
50
    "Simulated Annealing": SimulatedAnnealingOptimizer,
51
    "Stochastic Tunneling": StochasticTunnelingOptimizer,
52
    "Parallel Tempering": ParallelTemperingOptimizer,
53
    "Particle Swarm": ParticleSwarmOptimizer,
54
    "Evolution Strategy": EvolutionStrategyOptimizer,
55
    "Bayesian Optimization": BayesianOptimizer,
56
}
57
58
#################################################################################################
59
60
61
def collect_data(runs, X, y, sklearn_model, opt_list, search_config, n_iter, opt_dict):
62
    time_c = time.time()
63
64
    data_runs = []
65
    for run in range(runs):
66
        print("\nRun nr.", run, "\n")
67
        time_opt = []
68
69
        start = time.perf_counter()
70
        for i in tqdm.tqdm(range(n_iter)):
71
            scores = cross_val_score(
72
                sklearn_model,
73
                X,
74
                y,
75
                scoring="accuracy",
76
                n_jobs=opt_dict["n_jobs"],
77
                cv=opt_dict["cv"],
78
            )
79
        time_ = time.perf_counter() - start
80
81
        time_opt.append(time_)
82
        # data["No Opt"]["0"] = time_
83
84
        for key in opt_list.keys():
85
            print("optimizer:", key)
86
87
            n_iter_temp = n_iter
88
            opt_dict_temp = opt_dict
89
90
            if key == "Parallel Tempering":
91
                n_iter_temp = int(n_iter / 5)
92
                opt_dict_temp["system_temps"] = [0.1, 0.2, 0.01, 0.2, 0.01]
93
94
            if key == "Particle Swarm":
95
                n_iter_temp = int(n_iter / 5)
96
                opt_dict_temp["n_part"] = 5
97
98
            if key == "Evolution Strategy":
99
                n_iter_temp = int(n_iter / 5)
100
                opt_dict_temp["individuals"] = 5
101
102
            opt_obj = opt_list[key](search_config, n_iter_temp, **opt_dict_temp)
103
104
            start = time.perf_counter()
105
            opt_obj.fit(X, y)
106
            time_ = time.perf_counter() - start
107
108
            time_opt.append(time_)
109
110
        time_opt = np.array(time_opt)
111
        time_opt = time_opt / n_iter
112
        # time_opt = np.expand_dims(time_opt_norm, axis=0)
113
114
        data_runs.append(time_opt)
115
116
    data_runs = np.array(data_runs)
117
    print("\nCreate Dataframe\n")
118
119
    print("data_runs", data_runs, data_runs.shape)
120
121
    column_names = ["No Opt."] + list(opt_list.keys())
122
    data = pd.DataFrame(data_runs, columns=column_names)
123
124
    model_name = list(search_config.keys())[0]
125
126
    calc_optimizer_time_name = "optimizer_calc_time_" + model_name
127
128
    file_name = str(calc_optimizer_time_name)
129
    data.to_csv(file_name, index=False)
130
131
    print("data collecting time:", time.time() - time_c)
132
133
134
#################################################################################################
135
from sklearn.datasets import load_iris
136
from sklearn.neighbors import KNeighborsClassifier
137
138
iris_data = load_iris()
139
iris_X, iris_y = iris_data.data, iris_data.target
140
141
KNN = KNeighborsClassifier()
142
143
search_config_KNN = {"sklearn.neighbors.KNeighborsClassifier": {"n_neighbors": [5]}}
144
145
data_runs_dict_KNN = {
146
    "runs": runs,
147
    "X": iris_X,
148
    "y": iris_y,
149
    "sklearn_model": KNN,
150
    "opt_list": opt_list,
151
    "search_config": search_config_KNN,
152
    "n_iter": n_iter,
153
    "opt_dict": opt_dict,
154
}
155
156
# data_runs = collect_data(**data_runs_dict_KNN)
157
158
#################################################################################################
159
from sklearn.datasets import load_breast_cancer
160
from sklearn.tree import DecisionTreeClassifier
161
162
cancer_data = load_breast_cancer()
163
cancer_X, cancer_y = cancer_data.data, cancer_data.target
164
165
DTC = DecisionTreeClassifier()
166
167
search_config_DTC = {"sklearn.tree.DecisionTreeClassifier": {"min_samples_split": [2]}}
168
169
data_runs_dict_DTC = {
170
    "runs": runs,
171
    "X": cancer_X,
172
    "y": cancer_y,
173
    "sklearn_model": DTC,
174
    "opt_list": opt_list,
175
    "search_config": search_config_DTC,
176
    "n_iter": n_iter,
177
    "opt_dict": opt_dict,
178
}
179
180
# data_runs = collect_data(**data_runs_dict_DTC)
181
182
#################################################################################################
183
from sklearn.datasets import load_breast_cancer
184
from sklearn.ensemble import GradientBoostingClassifier
185
186
cancer_data = load_breast_cancer()
187
cancer_X, cancer_y = cancer_data.data, cancer_data.target
188
189
GBC = GradientBoostingClassifier()
190
191
search_config_GBC = {
192
    "sklearn.ensemble.GradientBoostingClassifier": {"n_estimators": [100]}
193
}
194
195
data_runs_dict_GBC = {
196
    "runs": runs,
197
    "X": cancer_X,
198
    "y": cancer_y,
199
    "sklearn_model": GBC,
200
    "opt_list": opt_list,
201
    "search_config": search_config_GBC,
202
    "n_iter": n_iter,
203
    "opt_dict": opt_dict,
204
}
205
206
# data_runs = collect_data(**data_runs_dict_GBC)
207
208
#################################################################################################
209
from sklearn.datasets import load_breast_cancer
210
from lightgbm import LGBMClassifier
211
212
cancer_data = load_breast_cancer()
213
cancer_X, cancer_y = cancer_data.data, cancer_data.target
214
215
LGBMC = LGBMClassifier(n_jobs=1)
216
217
search_config_LGBMC = {"lightgbm.LGBMClassifier": {"num_leaves": [31], "n_jobs": [1]}}
218
219
data_runs_dict_LGBMC = {
220
    "runs": runs,
221
    "X": cancer_X,
222
    "y": cancer_y,
223
    "sklearn_model": LGBMC,
224
    "opt_list": opt_list,
225
    "search_config": search_config_LGBMC,
226
    "n_iter": n_iter,
227
    "opt_dict": opt_dict,
228
}
229
230
data_runs = collect_data(**data_runs_dict_LGBMC)
231
232
#################################################################################################
233
234
235
(X_train, y_train), (X_test, y_test) = cifar10.load_data()
236
237
y_train = to_categorical(y_train)
238
y_test = to_categorical(y_test)
239
240
241
def make_model():
242
    from keras.applications import mobilenet_v2
243
    from keras.models import Model
244
245
    mobilenet_v2 = mobilenet_v2.MobileNetV2(weights="imagenet")
246
    mobilenet_v2_model = Model(inputs=mobilenet_v2.input, outputs=mobilenet_v2.output)
247
    mobilenet_v2_model.compile(
248
        loss="mean_squared_error", optimizer="adam", metrics=["accuracy"]
249
    )
250
251
    return mobilenet_v2_model
252
253
254
mobilenet_v2_model = KerasClassifier(build_fn=make_model, batch_size=500, epochs=3)
255
256
search_config_mobilenet_v2 = {
257
    "keras.compile.0": {"loss": ["categorical_crossentropy"], "optimizer": ["adam"]},
258
    "keras.fit.0": {"epochs": [3], "batch_size": [500], "verbose": [0]},
259
    "keras.applications.mobilenet_v2.1": {"weights": ["imagenet"]},
260
}
261
262
data_runs_dict_mobilenet_v2 = {
263
    "runs": runs,
264
    "X": X_train,
265
    "y": y_train,
266
    "sklearn_model": mobilenet_v2_model,
267
    "opt_list": opt_list,
268
    "search_config": search_config_mobilenet_v2,
269
    "n_iter": n_iter,
270
    "opt_dict": opt_dict,
271
}
272
273
# data_runs = collect_data(**data_runs_dict_mobilenet_v2)
274