Passed
Push — master ( 61a8e6...a7d091 )
by Simon
03:21
created

BaseOptimizer._search_multiprocessing()   A

Complexity

Conditions 1

Size

Total Lines 8
Code Lines 5

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
eloc 5
dl 0
loc 8
rs 10
c 0
b 0
f 0
cc 1
nop 3
1
# Author: Simon Blanke
2
# Email: [email protected]
3
# License: MIT License
4
5
import os
6
import time
7
import numpy as np
8
import multiprocessing
9
10
from functools import partial
11
12
from .base_positioner import BasePositioner
13
from .verb import VerbosityLVL0, VerbosityLVL1, VerbosityLVL2
14
from .util import init_candidate, init_eval
15
from .candidate import Candidate
16
from meta_learn import HyperactiveWrapper
17
18
19
class BaseOptimizer:
20
    def __init__(self, _core_, _arg_):
21
22
        """
23
24
        Parameters
25
        ----------
26
27
        search_config: dict
28
            A dictionary providing the model and hyperparameter search space for the
29
            optimization process.
30
        n_iter: int
31
            The number of iterations the optimizer performs.
32
        metric: string, optional (default: "accuracy")
33
            The metric the model is evaluated by.
34
        n_jobs: int, optional (default: 1)
35
            The number of searches to run in parallel.
36
        cv: int, optional (default: 3)
37
            The number of folds for the cross validation.
38
        verbosity: int, optional (default: 1)
39
            Verbosity level. 1 prints out warm_start points and their scores.
40
        random_state: int, optional (default: None)
41
            Sets the random seed.
42
        warm_start: dict, optional (default: False)
43
            Dictionary that definies a start point for the optimizer.
44
        memory: bool, optional (default: True)
45
            A memory, that saves the evaluation during the optimization to save time when
46
            optimizer returns to position.
47
        scatter_init: int, optional (default: False)
48
            Defines the number n of random positions that should be evaluated with 1/n the
49
            training data, to find a better initial position.
50
51
        Returns
52
        -------
53
        None
54
55
        """
56
57
        self._core_ = _core_
58
        self._arg_ = _arg_
59
        self._meta_ = None
60
61
        self.search_config = self._core_.search_config
62
        self.n_iter = self._core_.n_iter
63
64
        if self._core_.meta_learn:
65
            self._meta_ = HyperactiveWrapper(self._core_.search_config)
66
67
        verbs = [VerbosityLVL0, VerbosityLVL1, VerbosityLVL2]
68
        self._verb_ = verbs[_core_.verbosity]()
69
70
        self.pos_list = []
71
        self.score_list = []
72
73
    def _init_base_positioner(self, _cand_, positioner=None):
74
        if positioner:
75
            _p_ = positioner(**self._arg_.kwargs_opt)
76
        else:
77
            _p_ = BasePositioner(**self._arg_.kwargs_opt)
78
79
        _p_.pos_current = _cand_.pos_best
80
        _p_.score_current = _cand_.score_best
81
82
        return _p_
83
84
    def _update_pos(self, _cand_, _p_):
85
        _cand_.pos_best = _p_.pos_new
86
        _cand_.score_best = _p_.score_new
87
88
        _p_.pos_current = _p_.pos_new
89
        _p_.score_current = _p_.score_new
90
91
        return _cand_, _p_
92
93
    def _initialize_search(self, _core_, nth_process, X, y):
94
        _cand_ = init_candidate(_core_, nth_process, Candidate)
95
        _cand_ = init_eval(_cand_, nth_process, X, y)
96
        _p_ = self._init_opt_positioner(_cand_, X, y)
97
        self._verb_.init_p_bar(_cand_, self._core_)
98
99
        if self._meta_:
100
            meta_data = self._meta_.get_func_metadata(_cand_.func_)
101
            if meta_data:
102
                _cand_._space_.load_memory(*meta_data)
103
104
        return _core_, _cand_, _p_
105
106
    def _finish_search(self, _core_, _cand_, X, y):
107
        _cand_.eval_pos(_cand_.pos_best, X, y, force_eval=True)
108
        self.eval_time = _cand_.eval_time_sum
109
        self._verb_.close_p_bar()
110
111
        return _cand_
112
113
    def search(self, nth_process, X, y):
114
        self._core_, _cand_, _p_ = self._initialize_search(
115
            self._core_, nth_process, X, y
116
        )
117
118
        for i in range(self._core_.n_iter):
119
            _cand_ = self._iterate(i, _cand_, _p_, X, y)
120
            self._verb_.update_p_bar(1, _cand_)
121
122
            run_time = time.time() - self.start_time
123
            if self._core_.max_time and run_time > self._core_.max_time:
124
                break
125
126
            # get_search_path
127
            if self._core_.get_search_path:
128
                pos_list = []
129
                score_list = []
130
                if isinstance(_p_, list):
131
                    for p in _p_:
132
                        pos_list.append(p.pos_new)
133
                        score_list.append(p.score_new)
134
135
                        pos_list_ = np.array(pos_list)
136
                        score_list_ = np.array(score_list)
137
138
                    self.pos_list.append(pos_list_)
0 ignored issues
show
introduced by
The variable pos_list_ does not seem to be defined for all execution paths.
Loading history...
139
                    self.score_list.append(score_list_)
0 ignored issues
show
introduced by
The variable score_list_ does not seem to be defined for all execution paths.
Loading history...
140
                else:
141
                    pos_list.append(_p_.pos_new)
142
                    score_list.append(_p_.score_new)
143
144
                    pos_list_ = np.array(pos_list)
145
                    score_list_ = np.array(score_list)
146
147
                    self.pos_list.append(pos_list_)
148
                    self.score_list.append(score_list_)
149
150
        _cand_ = self._finish_search(self._core_, _cand_, X, y)
151
152
        return _cand_
153
154
    def _search_multiprocessing(self, X, y):
155
        """Wrapper for the parallel search. Passes integer that corresponds to process number"""
156
        pool = multiprocessing.Pool(self._core_.n_jobs)
157
        search = partial(self.search, X=X, y=y)
158
159
        _cand_list = pool.map(search, self._core_._n_process_range)
160
161
        return _cand_list
162
163
    def _run_one_job(self, X, y):
164
        _cand_ = self.search(0, X, y)
165
166
        start_point = self._verb_.print_start_point(_cand_)
167
        self.results[_cand_.score_best] = start_point
168
        self.score_best = _cand_.score_best
169
170
        if self._core_.meta_learn:
171
            self._meta_.collect(X, y, _cand_list=[_cand_])
172
173
    def _run_multiple_jobs(self, X, y):
174
        _cand_list = self._search_multiprocessing(X, y)
175
176
        score_best_sorted, model_best_sorted, results = self._verb_.print_start_points(
177
            _cand_list, self._core_
178
        )
179
180
        self.results = results
181
182
        self.score_best = score_best_sorted[0]
183
        self.model_best = model_best_sorted[0]
184
185
    def _fit(self, X, y):
186
        """Public method for starting the search with the training data (X, y)
187
188
        Parameters
189
        ----------
190
        X : array-like or sparse matrix of shape = [n_samples, n_features]
191
192
        y : array-like, shape = [n_samples] or [n_samples, n_outputs]
193
194
        Returns
195
        -------
196
        None
197
        """
198
        self.start_time = time.time()
199
        self.results = {}
200
201
        if self._core_.n_jobs == 1:
202
            self._run_one_job(X, y)
203
        else:
204
            self._run_multiple_jobs(X, y)
205