Passed
Push — master ( 507ec3...bdda8d )
by Simon
01:36
created

BaseOptimizer.__init__()   A

Complexity

Conditions 3

Size

Total Lines 49
Code Lines 10

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
eloc 10
dl 0
loc 49
rs 9.9
c 0
b 0
f 0
cc 3
nop 3
1
# Author: Simon Blanke
2
# Email: [email protected]
3
# License: MIT License
4
5
6
import numpy as np
7
import multiprocessing
8
9
from functools import partial
10
11
from .base_positioner import BasePositioner
12
from .util import initialize_search, finish_search_, sort_for_best
13
from meta_learn import HyperactiveWrapper
14
15
16
class BaseOptimizer:
17
    def __init__(self, _config_, _arg_):
18
19
        """
20
21
        Parameters
22
        ----------
23
24
        search_config: dict
25
            A dictionary providing the model and hyperparameter search space for the
26
            optimization process.
27
        n_iter: int
28
            The number of iterations the optimizer performs.
29
        metric: string, optional (default: "accuracy")
30
            The metric the model is evaluated by.
31
        n_jobs: int, optional (default: 1)
32
            The number of searches to run in parallel.
33
        cv: int, optional (default: 3)
34
            The number of folds for the cross validation.
35
        verbosity: int, optional (default: 1)
36
            Verbosity level. 1 prints out warm_start points and their scores.
37
        random_state: int, optional (default: None)
38
            Sets the random seed.
39
        warm_start: dict, optional (default: False)
40
            Dictionary that definies a start point for the optimizer.
41
        memory: bool, optional (default: True)
42
            A memory, that saves the evaluation during the optimization to save time when
43
            optimizer returns to position.
44
        scatter_init: int, optional (default: False)
45
            Defines the number n of random positions that should be evaluated with 1/n the
46
            training data, to find a better initial position.
47
48
        Returns
49
        -------
50
        None
51
52
        """
53
54
        self._config_ = _config_
55
        self._arg_ = _arg_
56
57
        self.search_config = self._config_.search_config
58
        self.n_iter = self._config_.n_iter
59
60
        if self._config_.meta_learn:
61
            self._meta_ = HyperactiveWrapper(self._config_.search_config)
62
63
        if self._config_.get_search_path:
64
            self.pos_list = []
65
            self.score_list = []
66
67
    def _hill_climb_iteration(self, _cand_, _p_, X, y):
68
        _p_.pos_new = _p_.move_climb(_cand_, _p_.pos_current)
69
        _p_.score_new = _cand_.eval_pos(_p_.pos_new, X, y)
70
71
        if _p_.score_new > _cand_.score_best:
72
            _cand_, _p_ = self._update_pos(_cand_, _p_)
73
74
        return _cand_, _p_
75
76
    def _init_base_positioner(self, _cand_, positioner=None, pos_para={}):
77
        if positioner:
78
            _p_ = positioner(**pos_para)
79
        else:
80
            _p_ = BasePositioner(**pos_para)
81
82
        _p_.pos_current = _cand_.pos_best
83
        _p_.score_current = _cand_.score_best
84
85
        return _p_
86
87
    def _update_pos(self, _cand_, _p_):
88
        _cand_.pos_best = _p_.pos_new
89
        _cand_.score_best = _p_.score_new
90
91
        _p_.pos_current = _p_.pos_new
92
        _p_.score_current = _p_.score_new
93
94
        return _cand_, _p_
95
96
    def search(self, nth_process, X, y):
97
        self._config_, _cand_ = initialize_search(self._config_, nth_process, X, y)
98
        _p_ = self._init_opt_positioner(_cand_, X, y)
99
100
        for i in range(self._config_.n_iter):
101
            _cand_ = self._iterate(i, _cand_, _p_, X, y)
102
            self._config_.update_p_bar(1, _cand_)
103
104
            if self._config_.get_search_path:
105
                pos_list = []
106
                score_list = []
107
                if isinstance(_p_, list):
108
                    for p in _p_:
109
                        pos_list.append(p.pos_new)
110
                        score_list.append(p.score_new)
111
112
                        pos_list_ = np.array(pos_list)
113
                        score_list_ = np.array(score_list)
114
115
                    self.pos_list.append(pos_list_)
0 ignored issues
show
introduced by
The variable pos_list_ does not seem to be defined for all execution paths.
Loading history...
116
                    self.score_list.append(score_list_)
0 ignored issues
show
introduced by
The variable score_list_ does not seem to be defined for all execution paths.
Loading history...
117
                else:
118
                    pos_list.append(_p_.pos_new)
119
                    score_list.append(_p_.score_new)
120
121
                    pos_list_ = np.array(pos_list)
122
                    score_list_ = np.array(score_list)
123
124
                    self.pos_list.append(pos_list_)
125
                    self.score_list.append(score_list_)
126
127
        _cand_ = finish_search_(self._config_, _cand_, X, y)
128
129
        return _cand_
130
131
    def _search_multiprocessing(self, X, y):
132
        """Wrapper for the parallel search. Passes integer that corresponds to process number"""
133
        pool = multiprocessing.Pool(self._config_.n_jobs)
134
        search = partial(self.search, X=X, y=y)
135
136
        _cand_list = pool.map(search, self._config_._n_process_range)
137
138
        return _cand_list
139
140
    def _run_one_job(self, X, y):
141
        _cand_ = self.search(0, X, y)
142
143
        self.model_best = _cand_.model_best
144
        self.score_best = _cand_.score_best
145
        start_point = _cand_._get_warm_start()
146
147
        if self._config_.verbosity:
148
            print("\nscore       =", self.score_best)
149
            print("start_point =", start_point)
150
151
        if self._config_.meta_learn:
152
            self._meta_.collect(X, y, _cand_list=[_cand_])
153
154
    def _run_multiple_jobs(self, X, y):
155
        _cand_list = self._search_multiprocessing(X, y)
156
157
        start_point_list = []
158
        score_best_list = []
159
        model_best_list = []
160
        for _cand_ in _cand_list:
161
            model_best = _cand_.model_best
162
            score_best = _cand_.score_best
163
            start_point = _cand_._get_warm_start()
164
165
            start_point_list.append(start_point)
166
            score_best_list.append(score_best)
167
            model_best_list.append(model_best)
168
169
        start_point_sorted, score_best_sorted = sort_for_best(
170
            start_point_list, score_best_list
171
        )
172
173
        model_best_sorted, score_best_sorted = sort_for_best(
174
            model_best_list, score_best_list
175
        )
176
177
        if self._config_.verbosity:
178
            for i in range(int(self._config_.n_jobs / 2)):
179
                print("\n")
180
            print("\nList of start points (best first):\n")
181
            for start_point, score_best in zip(start_point_sorted, score_best_sorted):
182
                print("score       =", score_best)
183
                print("start_point =", start_point, "\n")
184
185
        self.score_best = score_best_sorted[0]
186
        self.model_best = model_best_sorted[0]
187
188
    def _fit(self, X, y):
189
        """Public method for starting the search with the training data (X, y)
190
191
        Parameters
192
        ----------
193
        X : array-like or sparse matrix of shape = [n_samples, n_features]
194
195
        y : array-like, shape = [n_samples] or [n_samples, n_outputs]
196
197
        Returns
198
        -------
199
        None
200
        """
201
202
        if self._config_.n_jobs == 1:
203
            self._run_one_job(X, y)
204
        else:
205
            self._run_multiple_jobs(X, y)
206