Passed
Push — master ( 3f1f7f...e4a05a )
by Simon
02:49 queued 10s
created

hyperactive.base_optimizer.BaseOptimizer.search()   B

Complexity

Conditions 5

Size

Total Lines 34
Code Lines 25

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
eloc 25
dl 0
loc 34
rs 8.8133
c 0
b 0
f 0
cc 5
nop 4
1
# Author: Simon Blanke
2
# Email: [email protected]
3
# License: MIT License
4
5
6
import pickle
7
import numpy as np
8
import multiprocessing
9
10
from importlib import import_module
11
from functools import partial
12
13
from .base_positioner import BasePositioner
14
from .config import Config
15
from .opt_args import Arguments
16
from .sub_packages import MetaLearn
17
from .util import initialize_search, finish_search_, sort_for_best
18
19
20
class BaseOptimizer:
21
    def __init__(self, *args, **kwargs):
22
23
        """
24
25
        Parameters
26
        ----------
27
28
        search_config: dict
29
            A dictionary providing the model and hyperparameter search space for the
30
            optimization process.
31
        n_iter: int
32
            The number of iterations the optimizer performs.
33
        metric: string, optional (default: "accuracy")
34
            The metric the model is evaluated by.
35
        n_jobs: int, optional (default: 1)
36
            The number of searches to run in parallel.
37
        cv: int, optional (default: 5)
38
            The number of folds for the cross validation.
39
        verbosity: int, optional (default: 1)
40
            Verbosity level. 1 prints out warm_start points and their scores.
41
        random_state: int, optional (default: None)
42
            Sets the random seed.
43
        warm_start: dict, optional (default: False)
44
            Dictionary that definies a start point for the optimizer.
45
        memory: bool, optional (default: True)
46
            A memory, that saves the evaluation during the optimization to save time when
47
            optimizer returns to position.
48
        scatter_init: int, optional (default: False)
49
            Defines the number n of random positions that should be evaluated with 1/n the
50
            training data, to find a better initial position.
51
52
        Returns
53
        -------
54
        None
55
56
        """
57
58
        self._config_ = Config(*args, **kwargs)
59
        self._arg_ = Arguments(**kwargs)
60
61
        if self._config_.meta_learn:
62
            self._meta_ = MetaLearn(self._config_.search_config)
63
64
        self.search_config = self._config_.search_config
65
        self.n_iter = self._config_.n_iter
66
67
        if self._config_.get_search_path:
68
            self.pos_list = []
69
            self.score_list = []
70
71
    def _hill_climb_iteration(self, _cand_, _p_, X, y):
72
        _p_.pos_new = _p_.move_climb(_cand_, _p_.pos_current)
73
        _p_.score_new = _cand_.eval_pos(_p_.pos_new, X, y)
74
75
        if _p_.score_new > _cand_.score_best:
76
            _cand_, _p_ = self._update_pos(_cand_, _p_)
77
78
        return _cand_, _p_
79
80
    def _init_base_positioner(self, _cand_, positioner=None, pos_para={}):
81
        if positioner:
82
            _p_ = positioner(**pos_para)
83
        else:
84
            _p_ = BasePositioner(**pos_para)
85
86
        _p_.pos_current = _cand_.pos_best
87
        _p_.score_current = _cand_.score_best
88
89
        return _p_
90
91
    def _update_pos(self, _cand_, _p_):
92
        _cand_.pos_best = _p_.pos_new
93
        _cand_.score_best = _p_.score_new
94
95
        _p_.pos_current = _p_.pos_new
96
        _p_.score_current = _p_.score_new
97
98
        return _cand_, _p_
99
100
    def search(self, nth_process, X, y):
101
        self._config_, _cand_ = initialize_search(self._config_, nth_process, X, y)
102
        _p_ = self._init_opt_positioner(_cand_, X, y)
103
104
        for i in range(self._config_.n_iter):
105
            _cand_ = self._iterate(i, _cand_, _p_, X, y)
106
            self._config_.update_p_bar(1)
107
108
            if self._config_.get_search_path:
109
                pos_list = []
110
                score_list = []
111
                if isinstance(_p_, list):
112
                    for p in _p_:
113
                        pos_list.append(p.pos_new)
114
                        score_list.append(p.score_new)
115
116
                        pos_list_ = np.array(pos_list)
117
                        score_list_ = np.array(score_list)
118
119
                    self.pos_list.append(pos_list_)
0 ignored issues
show
introduced by
The variable pos_list_ does not seem to be defined for all execution paths.
Loading history...
120
                    self.score_list.append(score_list_)
0 ignored issues
show
introduced by
The variable score_list_ does not seem to be defined for all execution paths.
Loading history...
121
                else:
122
                    pos_list.append(_p_.pos_new)
123
                    score_list.append(_p_.score_new)
124
125
                    pos_list_ = np.array(pos_list)
126
                    score_list_ = np.array(score_list)
127
128
                    self.pos_list.append(pos_list_)
129
                    self.score_list.append(score_list_)
130
131
        _cand_ = finish_search_(self._config_, _cand_, X, y)
132
133
        return _cand_, _p_
134
135
    def _search_multiprocessing(self, X, y):
136
        """Wrapper for the parallel search. Passes integer that corresponds to process number"""
137
        pool = multiprocessing.Pool(self._config_.n_jobs)
138
        search = partial(self.search, X=X, y=y)
139
140
        _cand_list, _p_list = pool.map(search, self._config_._n_process_range)
141
142
        return _cand_list
143
144
    def _run_one_job(self, X, y):
145
        _cand_, _p_ = self.search(0, X, y)
146
        if self._config_.meta_learn:
147
            self._meta_.collect(X, y, _cand_list=[_cand_])
148
149
        self.model_best = _cand_.model
150
        self.score_best = _cand_.score_best
151
        start_point = _cand_._get_warm_start()
152
153
        if self._config_.verbosity:
154
            print("\n", self._config_.metric, self.score_best)
155
            print("start_point =", start_point)
156
157
        if self._config_.get_search_path:
158
            self._p_ = _p_
159
160
    def _run_multiple_jobs(self, X, y):
161
        _cand_list = self._search_multiprocessing(X, y)
162
163
        start_point_list = []
164
        score_best_list = []
165
        model_best_list = []
166
        for _cand_ in _cand_list:
167
            model_best = _cand_.model
168
            score_best = _cand_.score_best
169
            start_point = _cand_._get_warm_start()
170
171
            start_point_list.append(start_point)
172
            score_best_list.append(score_best)
173
            model_best_list.append(model_best)
174
175
        start_point_sorted, score_best_sorted = sort_for_best(
176
            start_point_list, score_best_list
177
        )
178
179
        model_best_sorted, score_best_sorted = sort_for_best(
180
            model_best_list, score_best_list
181
        )
182
183
        if self._config_.verbosity:
184
            print("\nList of start points (best first):")
185
            for start_point, score_best in zip(start_point_sorted, score_best_sorted):
186
                print("\n", self._config_.metric, score_best)
187
                print("start_point =", start_point)
188
189
        self.score_best = score_best_sorted[0]
190
        self.model_best = model_best_sorted[0]
191
192
    def fit(self, X, y):
193
        """Public method for starting the search with the training data (X, y)
194
195
        Parameters
196
        ----------
197
        X : array-like or sparse matrix of shape = [n_samples, n_features]
198
199
        y : array-like, shape = [n_samples] or [n_samples, n_outputs]
200
201
        Returns
202
        -------
203
        None
204
        """
205
        X, y = self._config_._check_data(X, y)
206
207
        if self._config_.model_type == "keras":
208
            self._config_.n_jobs = 1
209
210
        if self._config_.n_jobs == 1:
211
            self._run_one_job(X, y)
212
213
        else:
214
            self._run_multiple_jobs(X, y)
215
216
    def predict(self, X_test):
217
        """Returns the prediction of X_test after a model was searched by `fit`
218
219
        Parameters
220
        ----------
221
        X_test : array-like or sparse matrix of shape = [n_samples, n_features]
222
223
        Returns
224
        -------
225
        (unnamed array) : array-like, shape = [n_samples] or [n_samples, n_outputs]
226
        """
227
        return self.model_best.predict(X_test)
228
229
    def score(self, X_test, y_true):
230
        """Returns the score calculated from the prediction of X_test and the true values from y_test
231
232
        Parameters
233
        ----------
234
        X_test : array-like or sparse matrix of shape = [n_samples, n_features]
235
236
        y_true : array-like, shape = [n_samples] or [n_samples, n_outputs]
237
238
        Returns
239
        -------
240
        (unnamed float) : float
241
        """
242
        if self._config_.model_type in ["sklearn", "xgboost", "lightgbm", "catboost"]:
243
            module = import_module("sklearn.metrics")
244
            metric_class = getattr(module, self._config_.metric)
245
246
            y_pred = self.model_best.predict(X_test)
247
            return metric_class(y_true, y_pred)
248
        elif self._config_.model_type in ["keras"]:
249
            loss, score = self.model_best.evaluate(X_test, y_true, verbose=0)
250
            return score
251
252
        """
253
        y_pred = self.model_best.predict(X_test)
254
255
        metric_type = list(self._config_.metric.keys())[0]
256
        metric_class = self._config_.metric[metric_type]
257
258
        return metric_class(y_true, y_pred)
259
        """
260
261
    def export(self, filename):
262
        """Exports the best model, that was found by the optimizer during `fit`
263
264
        Parameters
265
        ----------
266
        filename : string or path
267
268
        Returns
269
        -------
270
        None
271
        """
272
        if self.model_best:
273
            pickle.dump(self.model_best, open(filename, "wb"))
274