Passed
Push — master ( 9aa23e...87bd68 )
by Simon
01:40
created

  A

Complexity

Conditions 2

Size

Total Lines 8
Code Lines 6

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
eloc 6
dl 0
loc 8
rs 10
c 0
b 0
f 0
cc 2
nop 5
1
# Author: Simon Blanke
2
# Email: [email protected]
3
# License: MIT License
4
5
import time
6
import numpy as np
7
import multiprocessing
8
9
from functools import partial
10
11
from .base_positioner import BasePositioner
12
from .util import initialize_search, finish_search_, sort_for_best
13
from meta_learn import HyperactiveWrapper
14
15
16
class BaseOptimizer:
17
    def __init__(self, _core_, _arg_):
18
19
        """
20
21
        Parameters
22
        ----------
23
24
        search_config: dict
25
            A dictionary providing the model and hyperparameter search space for the
26
            optimization process.
27
        n_iter: int
28
            The number of iterations the optimizer performs.
29
        metric: string, optional (default: "accuracy")
30
            The metric the model is evaluated by.
31
        n_jobs: int, optional (default: 1)
32
            The number of searches to run in parallel.
33
        cv: int, optional (default: 3)
34
            The number of folds for the cross validation.
35
        verbosity: int, optional (default: 1)
36
            Verbosity level. 1 prints out warm_start points and their scores.
37
        random_state: int, optional (default: None)
38
            Sets the random seed.
39
        warm_start: dict, optional (default: False)
40
            Dictionary that definies a start point for the optimizer.
41
        memory: bool, optional (default: True)
42
            A memory, that saves the evaluation during the optimization to save time when
43
            optimizer returns to position.
44
        scatter_init: int, optional (default: False)
45
            Defines the number n of random positions that should be evaluated with 1/n the
46
            training data, to find a better initial position.
47
48
        Returns
49
        -------
50
        None
51
52
        """
53
54
        self._core_ = _core_
55
        self._arg_ = _arg_
56
57
        self.search_config = self._core_.search_config
58
        self.n_iter = self._core_.n_iter
59
60
        if self._core_.meta_learn:
61
            self._meta_ = HyperactiveWrapper(self._core_.search_config)
62
63
        self.pos_list = []
64
        self.score_list = []
65
66
    def _init_base_positioner(self, _cand_, positioner=None):
67
        if positioner:
68
            _p_ = positioner(**self._arg_.kwargs_opt)
69
        else:
70
            _p_ = BasePositioner(**self._arg_.kwargs_opt)
71
72
        _p_.pos_current = _cand_.pos_best
73
        _p_.score_current = _cand_.score_best
74
75
        return _p_
76
77
    def _update_pos(self, _cand_, _p_):
78
        _cand_.pos_best = _p_.pos_new
79
        _cand_.score_best = _p_.score_new
80
81
        _p_.pos_current = _p_.pos_new
82
        _p_.score_current = _p_.score_new
83
84
        return _cand_, _p_
85
86
    def search(self, nth_process, X, y):
87
        self._core_, _cand_ = initialize_search(self._core_, nth_process, X, y)
88
        _p_ = self._init_opt_positioner(_cand_, X, y)
89
90
        for i in range(self._core_.n_iter):
91
            _cand_ = self._iterate(i, _cand_, _p_, X, y)
92
            self._core_.update_p_bar(1, _cand_)
93
94
            run_time = time.time() - self.start_time
95
            if self._core_.max_time and run_time > self._core_.max_time:
96
                break
97
98
            # get_search_path
99
            if self._core_.get_search_path:
100
                pos_list = []
101
                score_list = []
102
                if isinstance(_p_, list):
103
                    for p in _p_:
104
                        pos_list.append(p.pos_new)
105
                        score_list.append(p.score_new)
106
107
                        pos_list_ = np.array(pos_list)
108
                        score_list_ = np.array(score_list)
109
110
                    self.pos_list.append(pos_list_)
0 ignored issues
show
introduced by
The variable pos_list_ does not seem to be defined for all execution paths.
Loading history...
111
                    self.score_list.append(score_list_)
0 ignored issues
show
introduced by
The variable score_list_ does not seem to be defined for all execution paths.
Loading history...
112
                else:
113
                    pos_list.append(_p_.pos_new)
114
                    score_list.append(_p_.score_new)
115
116
                    pos_list_ = np.array(pos_list)
117
                    score_list_ = np.array(score_list)
118
119
                    self.pos_list.append(pos_list_)
120
                    self.score_list.append(score_list_)
121
122
        _cand_ = finish_search_(self._core_, _cand_, X, y)
123
124
        return _cand_
125
126
    def _search_multiprocessing(self, X, y):
127
        """Wrapper for the parallel search. Passes integer that corresponds to process number"""
128
        pool = multiprocessing.Pool(self._core_.n_jobs)
129
        search = partial(self.search, X=X, y=y)
130
131
        _cand_list = pool.map(search, self._core_._n_process_range)
132
133
        return _cand_list
134
135
    def _run_one_job(self, X, y):
136
        _cand_ = self.search(0, X, y)
137
138
        self.model_best = _cand_.model_best
139
        self.score_best = _cand_.score_best
140
        start_point = _cand_._get_warm_start()
141
142
        self.results[self.score_best] = start_point
143
144
        if self._core_.verbosity:
145
            print("\nbest para =", start_point)
146
            print("score     =", self.score_best)
147
148
        if self._core_.meta_learn:
149
            self._meta_.collect(X, y, _cand_list=[_cand_])
150
151
    def _run_multiple_jobs(self, X, y):
152
        _cand_list = self._search_multiprocessing(X, y)
153
154
        start_point_list = []
155
        score_best_list = []
156
        model_best_list = []
157
        for _cand_ in _cand_list:
158
            model_best = _cand_.model_best
159
            score_best = _cand_.score_best
160
            start_point = _cand_._get_warm_start()
161
162
            self.results[score_best] = start_point
163
164
            start_point_list.append(start_point)
165
            score_best_list.append(score_best)
166
            model_best_list.append(model_best)
167
168
        start_point_sorted, score_best_sorted = sort_for_best(
169
            start_point_list, score_best_list
170
        )
171
172
        model_best_sorted, score_best_sorted = sort_for_best(
173
            model_best_list, score_best_list
174
        )
175
176
        if self._core_.verbosity:
177
            for i in range(int(self._core_.n_jobs / 2)):
178
                print("\n")
179
            print("\nList of start points (best first):\n")
180
            for start_point, score_best in zip(start_point_sorted, score_best_sorted):
181
182
                print("best para =", start_point)
183
                print("score     =", score_best, "\n")
184
185
        self.score_best = score_best_sorted[0]
186
        self.model_best = model_best_sorted[0]
187
188
    def _fit(self, X, y):
189
        """Public method for starting the search with the training data (X, y)
190
191
        Parameters
192
        ----------
193
        X : array-like or sparse matrix of shape = [n_samples, n_features]
194
195
        y : array-like, shape = [n_samples] or [n_samples, n_outputs]
196
197
        Returns
198
        -------
199
        None
200
        """
201
        self.start_time = time.time()
202
        self.results = {}
203
204
        if self._core_.n_jobs == 1:
205
            self._run_one_job(X, y)
206
        else:
207
            self._run_multiple_jobs(X, y)
208