hyperactive.base_optimizer.BaseOptimizer._search_multiprocessing() - Code Metrics - Inspection of "rm opt-dev from dev-branch" - SimonBlanke/Hyperactive - Measure and Improve Code Quality continuously with Scrutinizer

Passed

Push — master ( 61a8e6...a7d091 )

by Simon

created 2019-10-27 13:44 UTC

BaseOptimizer._search_multiprocessing() A

↳ Parent: hyperactive.base_optimizer

Complexity

Conditions

Size

Total Lines	8
Code Lines	5

Duplication

Lines	0
Ratio	0 %

Importance

Changes

Metric	Value
eloc	5
dl	0
loc	8
rs	10
c	0
b	0
f	0
cc	1
nop	3

# Author: Simon Blanke
# Email: [email protected]
# License: MIT License

import os
import time
import numpy as np
import multiprocessing

from functools import partial

from .base_positioner import BasePositioner
from .verb import VerbosityLVL0, VerbosityLVL1, VerbosityLVL2
from .util import init_candidate, init_eval
from .candidate import Candidate
from meta_learn import HyperactiveWrapper


class BaseOptimizer:
    def __init__(self, _core_, _arg_):

        """

        Parameters
        ----------

        search_config: dict
            A dictionary providing the model and hyperparameter search space for the
            optimization process.
        n_iter: int
            The number of iterations the optimizer performs.
        metric: string, optional (default: "accuracy")
            The metric the model is evaluated by.
        n_jobs: int, optional (default: 1)
            The number of searches to run in parallel.
        cv: int, optional (default: 3)
            The number of folds for the cross validation.
        verbosity: int, optional (default: 1)
            Verbosity level. 1 prints out warm_start points and their scores.
        random_state: int, optional (default: None)
            Sets the random seed.
        warm_start: dict, optional (default: False)
            Dictionary that definies a start point for the optimizer.
        memory: bool, optional (default: True)
            A memory, that saves the evaluation during the optimization to save time when
            optimizer returns to position.
        scatter_init: int, optional (default: False)
            Defines the number n of random positions that should be evaluated with 1/n the
            training data, to find a better initial position.

        Returns
        -------
        None

        """

        self._core_ = _core_
        self._arg_ = _arg_
        self._meta_ = None

        self.search_config = self._core_.search_config
        self.n_iter = self._core_.n_iter

        if self._core_.meta_learn:
            self._meta_ = HyperactiveWrapper(self._core_.search_config)

        verbs = [VerbosityLVL0, VerbosityLVL1, VerbosityLVL2]
        self._verb_ = verbs[_core_.verbosity]()

        self.pos_list = []
        self.score_list = []

    def _init_base_positioner(self, _cand_, positioner=None):
        if positioner:
            _p_ = positioner(**self._arg_.kwargs_opt)
        else:
            _p_ = BasePositioner(**self._arg_.kwargs_opt)

        _p_.pos_current = _cand_.pos_best
        _p_.score_current = _cand_.score_best

        return _p_

    def _update_pos(self, _cand_, _p_):
        _cand_.pos_best = _p_.pos_new
        _cand_.score_best = _p_.score_new

        _p_.pos_current = _p_.pos_new
        _p_.score_current = _p_.score_new

        return _cand_, _p_

    def _initialize_search(self, _core_, nth_process, X, y):
        _cand_ = init_candidate(_core_, nth_process, Candidate)
        _cand_ = init_eval(_cand_, nth_process, X, y)
        _p_ = self._init_opt_positioner(_cand_, X, y)
        self._verb_.init_p_bar(_cand_, self._core_)

        if self._meta_:
            meta_data = self._meta_.get_func_metadata(_cand_.func_)
            if meta_data:
                _cand_._space_.load_memory(*meta_data)

        return _core_, _cand_, _p_

    def _finish_search(self, _core_, _cand_, X, y):
        _cand_.eval_pos(_cand_.pos_best, X, y, force_eval=True)
        self.eval_time = _cand_.eval_time_sum
        self._verb_.close_p_bar()

        return _cand_

    def search(self, nth_process, X, y):
        self._core_, _cand_, _p_ = self._initialize_search(
            self._core_, nth_process, X, y
        )

        for i in range(self._core_.n_iter):
            _cand_ = self._iterate(i, _cand_, _p_, X, y)
            self._verb_.update_p_bar(1, _cand_)

            run_time = time.time() - self.start_time
            if self._core_.max_time and run_time > self._core_.max_time:
                break

            # get_search_path
            if self._core_.get_search_path:
                pos_list = []
                score_list = []
                if isinstance(_p_, list):
                    for p in _p_:
                        pos_list.append(p.pos_new)
                        score_list.append(p.score_new)

                        pos_list_ = np.array(pos_list)
                        score_list_ = np.array(score_list)

                    self.pos_list.append(pos_list_)

                    self.score_list.append(score_list_)

                else:
                    pos_list.append(_p_.pos_new)
                    score_list.append(_p_.score_new)

                    pos_list_ = np.array(pos_list)
                    score_list_ = np.array(score_list)

                    self.pos_list.append(pos_list_)
                    self.score_list.append(score_list_)

        _cand_ = self._finish_search(self._core_, _cand_, X, y)

        return _cand_

    def _search_multiprocessing(self, X, y):
        """Wrapper for the parallel search. Passes integer that corresponds to process number"""
        pool = multiprocessing.Pool(self._core_.n_jobs)
        search = partial(self.search, X=X, y=y)

        _cand_list = pool.map(search, self._core_._n_process_range)

        return _cand_list

    def _run_one_job(self, X, y):
        _cand_ = self.search(0, X, y)

        start_point = self._verb_.print_start_point(_cand_)
        self.results[_cand_.score_best] = start_point
        self.score_best = _cand_.score_best

        if self._core_.meta_learn:
            self._meta_.collect(X, y, _cand_list=[_cand_])

    def _run_multiple_jobs(self, X, y):
        _cand_list = self._search_multiprocessing(X, y)

        score_best_sorted, model_best_sorted, results = self._verb_.print_start_points(
            _cand_list, self._core_
        )

        self.results = results

        self.score_best = score_best_sorted[0]
        self.model_best = model_best_sorted[0]

    def _fit(self, X, y):
        """Public method for starting the search with the training data (X, y)

        Parameters
        ----------
        X : array-like or sparse matrix of shape = [n_samples, n_features]

        y : array-like, shape = [n_samples] or [n_samples, n_outputs]

        Returns
        -------
        None
        """
        self.start_time = time.time()
        self.results = {}

        if self._core_.n_jobs == 1:
            self._run_one_job(X, y)
        else:
            self._run_multiple_jobs(X, y)


1			# Author: Simon Blanke
2			# Email: [email protected]
3			# License: MIT License
4
5			import os
6			import time
7			import numpy as np
8			import multiprocessing
9
10			from functools import partial
11
12			from .base_positioner import BasePositioner
13			from .verb import VerbosityLVL0, VerbosityLVL1, VerbosityLVL2
14			from .util import init_candidate, init_eval
15			from .candidate import Candidate
16			from meta_learn import HyperactiveWrapper
17
18
19			class BaseOptimizer:
20			def __init__(self, _core_, _arg_):
21
22			"""
23
24			Parameters
25			----------
26
27			search_config: dict
28			A dictionary providing the model and hyperparameter search space for the
29			optimization process.
30			n_iter: int
31			The number of iterations the optimizer performs.
32			metric: string, optional (default: "accuracy")
33			The metric the model is evaluated by.
34			n_jobs: int, optional (default: 1)
35			The number of searches to run in parallel.
36			cv: int, optional (default: 3)
37			The number of folds for the cross validation.
38			verbosity: int, optional (default: 1)
39			Verbosity level. 1 prints out warm_start points and their scores.
40			random_state: int, optional (default: None)
41			Sets the random seed.
42			warm_start: dict, optional (default: False)
43			Dictionary that definies a start point for the optimizer.
44			memory: bool, optional (default: True)
45			A memory, that saves the evaluation during the optimization to save time when
46			optimizer returns to position.
47			scatter_init: int, optional (default: False)
48			Defines the number n of random positions that should be evaluated with 1/n the
49			training data, to find a better initial position.
50
51			Returns
52			-------
53			None
54
55			"""
56
57			self._core_ = _core_
58			self._arg_ = _arg_
59			self._meta_ = None
60
61			self.search_config = self._core_.search_config
62			self.n_iter = self._core_.n_iter
63
64			if self._core_.meta_learn:
65			self._meta_ = HyperactiveWrapper(self._core_.search_config)
66
67			verbs = [VerbosityLVL0, VerbosityLVL1, VerbosityLVL2]
68			self._verb_ = verbs[_core_.verbosity]()
69
70			self.pos_list = []
71			self.score_list = []
72
73			def _init_base_positioner(self, _cand_, positioner=None):
74			if positioner:
75			_p_ = positioner(**self._arg_.kwargs_opt)
76			else:
77			_p_ = BasePositioner(**self._arg_.kwargs_opt)
78
79			_p_.pos_current = _cand_.pos_best
80			_p_.score_current = _cand_.score_best
81
82			return _p_
83
84			def _update_pos(self, _cand_, _p_):
85			_cand_.pos_best = _p_.pos_new
86			_cand_.score_best = _p_.score_new
87
88			_p_.pos_current = _p_.pos_new
89			_p_.score_current = _p_.score_new
90
91			return _cand_, _p_
92
93			def _initialize_search(self, _core_, nth_process, X, y):
94			_cand_ = init_candidate(_core_, nth_process, Candidate)
95			_cand_ = init_eval(_cand_, nth_process, X, y)
96			_p_ = self._init_opt_positioner(_cand_, X, y)
97			self._verb_.init_p_bar(_cand_, self._core_)
98
99			if self._meta_:
100			meta_data = self._meta_.get_func_metadata(_cand_.func_)
101			if meta_data:
102			_cand_._space_.load_memory(*meta_data)
103
104			return _core_, _cand_, _p_
105
106			def _finish_search(self, _core_, _cand_, X, y):
107			_cand_.eval_pos(_cand_.pos_best, X, y, force_eval=True)
108			self.eval_time = _cand_.eval_time_sum
109			self._verb_.close_p_bar()
110
111			return _cand_
112
113			def search(self, nth_process, X, y):
114			self._core_, _cand_, _p_ = self._initialize_search(
115			self._core_, nth_process, X, y
116			)
117
118			for i in range(self._core_.n_iter):
119			_cand_ = self._iterate(i, _cand_, _p_, X, y)
120			self._verb_.update_p_bar(1, _cand_)
121
122			run_time = time.time() - self.start_time
123			if self._core_.max_time and run_time > self._core_.max_time:
124			break
125
126			# get_search_path
127			if self._core_.get_search_path:
128			pos_list = []
129			score_list = []
130			if isinstance(_p_, list):
131			for p in _p_:
132			pos_list.append(p.pos_new)
133			score_list.append(p.score_new)
134
135			pos_list_ = np.array(pos_list)
136			score_list_ = np.array(score_list)
137
138			self.pos_list.append(pos_list_)
			0 ignored issues – show introduced 2019-09-08 11:07 UTC by Report Bug Copy Issue Report The variable `pos_list_` does not seem to be defined for all execution paths. Loading history...
139			self.score_list.append(score_list_)
			0 ignored issues – show introduced 2019-09-08 11:07 UTC by Report Bug Copy Issue Report The variable `score_list_` does not seem to be defined for all execution paths. Loading history...
140			else:
141			pos_list.append(_p_.pos_new)
142			score_list.append(_p_.score_new)
143
144			pos_list_ = np.array(pos_list)
145			score_list_ = np.array(score_list)
146
147			self.pos_list.append(pos_list_)
148			self.score_list.append(score_list_)
149
150			_cand_ = self._finish_search(self._core_, _cand_, X, y)
151
152			return _cand_
153
154			def _search_multiprocessing(self, X, y):
155			"""Wrapper for the parallel search. Passes integer that corresponds to process number"""
156			pool = multiprocessing.Pool(self._core_.n_jobs)
157			search = partial(self.search, X=X, y=y)
158
159			_cand_list = pool.map(search, self._core_._n_process_range)
160
161			return _cand_list
162
163			def _run_one_job(self, X, y):
164			_cand_ = self.search(0, X, y)
165
166			start_point = self._verb_.print_start_point(_cand_)
167			self.results[_cand_.score_best] = start_point
168			self.score_best = _cand_.score_best
169
170			if self._core_.meta_learn:
171			self._meta_.collect(X, y, _cand_list=[_cand_])
172
173			def _run_multiple_jobs(self, X, y):
174			_cand_list = self._search_multiprocessing(X, y)
175
176			score_best_sorted, model_best_sorted, results = self._verb_.print_start_points(
177			_cand_list, self._core_
178			)
179
180			self.results = results
181
182			self.score_best = score_best_sorted[0]
183			self.model_best = model_best_sorted[0]
184
185			def _fit(self, X, y):
186			"""Public method for starting the search with the training data (X, y)
187
188			Parameters
189			----------
190			X : array-like or sparse matrix of shape = [n_samples, n_features]
191
192			y : array-like, shape = [n_samples] or [n_samples, n_outputs]
193
194			Returns
195			-------
196			None
197			"""
198			self.start_time = time.time()
199			self.results = {}
200
201			if self._core_.n_jobs == 1:
202			self._run_one_job(X, y)
203			else:
204			self._run_multiple_jobs(X, y)
205

SimonBlanke / Hyperactive

Push — master ( 61a8e6...a7d091 )

BaseOptimizer._search_multiprocessing() A

Complexity

Size

Duplication

Importance

Duplication Side-by-Side

Filter issues like