hyperactive.base_optimizer.BaseOptimizer._run_multiple_jobs() - Code Metrics - Inspection of "v1.1.0" - SimonBlanke/Hyperactive - Measure and Improve Code Quality continuously with Scrutinizer

Passed

Push — master ( 507ec3...bdda8d )

by Simon

created 2019-10-08 17:15 UTC

BaseOptimizer._run_multiple_jobs() B

↳ Parent: hyperactive.base_optimizer

Complexity

Conditions

Size

Total Lines	33
Code Lines	25

Duplication

Lines	0
Ratio	0 %

Importance

Changes

Metric	Value
eloc	25
dl	0
loc	33
rs	8.8133
c	0
b	0
f	0
cc	5
nop	3

# Author: Simon Blanke
# Email: [email protected]
# License: MIT License


import numpy as np
import multiprocessing

from functools import partial

from .base_positioner import BasePositioner
from .util import initialize_search, finish_search_, sort_for_best
from meta_learn import HyperactiveWrapper


class BaseOptimizer:
    def __init__(self, _config_, _arg_):

        """

        Parameters
        ----------

        search_config: dict
            A dictionary providing the model and hyperparameter search space for the
            optimization process.
        n_iter: int
            The number of iterations the optimizer performs.
        metric: string, optional (default: "accuracy")
            The metric the model is evaluated by.
        n_jobs: int, optional (default: 1)
            The number of searches to run in parallel.
        cv: int, optional (default: 3)
            The number of folds for the cross validation.
        verbosity: int, optional (default: 1)
            Verbosity level. 1 prints out warm_start points and their scores.
        random_state: int, optional (default: None)
            Sets the random seed.
        warm_start: dict, optional (default: False)
            Dictionary that definies a start point for the optimizer.
        memory: bool, optional (default: True)
            A memory, that saves the evaluation during the optimization to save time when
            optimizer returns to position.
        scatter_init: int, optional (default: False)
            Defines the number n of random positions that should be evaluated with 1/n the
            training data, to find a better initial position.

        Returns
        -------
        None

        """

        self._config_ = _config_
        self._arg_ = _arg_

        self.search_config = self._config_.search_config
        self.n_iter = self._config_.n_iter

        if self._config_.meta_learn:
            self._meta_ = HyperactiveWrapper(self._config_.search_config)

        if self._config_.get_search_path:
            self.pos_list = []
            self.score_list = []

    def _hill_climb_iteration(self, _cand_, _p_, X, y):
        _p_.pos_new = _p_.move_climb(_cand_, _p_.pos_current)
        _p_.score_new = _cand_.eval_pos(_p_.pos_new, X, y)

        if _p_.score_new > _cand_.score_best:
            _cand_, _p_ = self._update_pos(_cand_, _p_)

        return _cand_, _p_

    def _init_base_positioner(self, _cand_, positioner=None, pos_para={}):
        if positioner:
            _p_ = positioner(**pos_para)
        else:
            _p_ = BasePositioner(**pos_para)

        _p_.pos_current = _cand_.pos_best
        _p_.score_current = _cand_.score_best

        return _p_

    def _update_pos(self, _cand_, _p_):
        _cand_.pos_best = _p_.pos_new
        _cand_.score_best = _p_.score_new

        _p_.pos_current = _p_.pos_new
        _p_.score_current = _p_.score_new

        return _cand_, _p_

    def search(self, nth_process, X, y):
        self._config_, _cand_ = initialize_search(self._config_, nth_process, X, y)
        _p_ = self._init_opt_positioner(_cand_, X, y)

        for i in range(self._config_.n_iter):
            _cand_ = self._iterate(i, _cand_, _p_, X, y)
            self._config_.update_p_bar(1, _cand_)

            if self._config_.get_search_path:
                pos_list = []
                score_list = []
                if isinstance(_p_, list):
                    for p in _p_:
                        pos_list.append(p.pos_new)
                        score_list.append(p.score_new)

                        pos_list_ = np.array(pos_list)
                        score_list_ = np.array(score_list)

                    self.pos_list.append(pos_list_)

                    self.score_list.append(score_list_)

                else:
                    pos_list.append(_p_.pos_new)
                    score_list.append(_p_.score_new)

                    pos_list_ = np.array(pos_list)
                    score_list_ = np.array(score_list)

                    self.pos_list.append(pos_list_)
                    self.score_list.append(score_list_)

        _cand_ = finish_search_(self._config_, _cand_, X, y)

        return _cand_

    def _search_multiprocessing(self, X, y):
        """Wrapper for the parallel search. Passes integer that corresponds to process number"""
        pool = multiprocessing.Pool(self._config_.n_jobs)
        search = partial(self.search, X=X, y=y)

        _cand_list = pool.map(search, self._config_._n_process_range)

        return _cand_list

    def _run_one_job(self, X, y):
        _cand_ = self.search(0, X, y)

        self.model_best = _cand_.model_best
        self.score_best = _cand_.score_best
        start_point = _cand_._get_warm_start()

        if self._config_.verbosity:
            print("\nscore       =", self.score_best)
            print("start_point =", start_point)

        if self._config_.meta_learn:
            self._meta_.collect(X, y, _cand_list=[_cand_])

    def _run_multiple_jobs(self, X, y):
        _cand_list = self._search_multiprocessing(X, y)

        start_point_list = []
        score_best_list = []
        model_best_list = []
        for _cand_ in _cand_list:
            model_best = _cand_.model_best
            score_best = _cand_.score_best
            start_point = _cand_._get_warm_start()

            start_point_list.append(start_point)
            score_best_list.append(score_best)
            model_best_list.append(model_best)

        start_point_sorted, score_best_sorted = sort_for_best(
            start_point_list, score_best_list
        )

        model_best_sorted, score_best_sorted = sort_for_best(
            model_best_list, score_best_list
        )

        if self._config_.verbosity:
            for i in range(int(self._config_.n_jobs / 2)):
                print("\n")
            print("\nList of start points (best first):\n")
            for start_point, score_best in zip(start_point_sorted, score_best_sorted):
                print("score       =", score_best)
                print("start_point =", start_point, "\n")

        self.score_best = score_best_sorted[0]
        self.model_best = model_best_sorted[0]

    def _fit(self, X, y):
        """Public method for starting the search with the training data (X, y)

        Parameters
        ----------
        X : array-like or sparse matrix of shape = [n_samples, n_features]

        y : array-like, shape = [n_samples] or [n_samples, n_outputs]

        Returns
        -------
        None
        """

        if self._config_.n_jobs == 1:
            self._run_one_job(X, y)
        else:
            self._run_multiple_jobs(X, y)


1			# Author: Simon Blanke
2			# Email: [email protected]
3			# License: MIT License
4
5
6			import numpy as np
7			import multiprocessing
8
9			from functools import partial
10
11			from .base_positioner import BasePositioner
12			from .util import initialize_search, finish_search_, sort_for_best
13			from meta_learn import HyperactiveWrapper
14
15
16			class BaseOptimizer:
17			def __init__(self, _config_, _arg_):
18
19			"""
20
21			Parameters
22			----------
23
24			search_config: dict
25			A dictionary providing the model and hyperparameter search space for the
26			optimization process.
27			n_iter: int
28			The number of iterations the optimizer performs.
29			metric: string, optional (default: "accuracy")
30			The metric the model is evaluated by.
31			n_jobs: int, optional (default: 1)
32			The number of searches to run in parallel.
33			cv: int, optional (default: 3)
34			The number of folds for the cross validation.
35			verbosity: int, optional (default: 1)
36			Verbosity level. 1 prints out warm_start points and their scores.
37			random_state: int, optional (default: None)
38			Sets the random seed.
39			warm_start: dict, optional (default: False)
40			Dictionary that definies a start point for the optimizer.
41			memory: bool, optional (default: True)
42			A memory, that saves the evaluation during the optimization to save time when
43			optimizer returns to position.
44			scatter_init: int, optional (default: False)
45			Defines the number n of random positions that should be evaluated with 1/n the
46			training data, to find a better initial position.
47
48			Returns
49			-------
50			None
51
52			"""
53
54			self._config_ = _config_
55			self._arg_ = _arg_
56
57			self.search_config = self._config_.search_config
58			self.n_iter = self._config_.n_iter
59
60			if self._config_.meta_learn:
61			self._meta_ = HyperactiveWrapper(self._config_.search_config)
62
63			if self._config_.get_search_path:
64			self.pos_list = []
65			self.score_list = []
66
67			def _hill_climb_iteration(self, _cand_, _p_, X, y):
68			_p_.pos_new = _p_.move_climb(_cand_, _p_.pos_current)
69			_p_.score_new = _cand_.eval_pos(_p_.pos_new, X, y)
70
71			if _p_.score_new > _cand_.score_best:
72			_cand_, _p_ = self._update_pos(_cand_, _p_)
73
74			return _cand_, _p_
75
76			def _init_base_positioner(self, _cand_, positioner=None, pos_para={}):
77			if positioner:
78			_p_ = positioner(**pos_para)
79			else:
80			_p_ = BasePositioner(**pos_para)
81
82			_p_.pos_current = _cand_.pos_best
83			_p_.score_current = _cand_.score_best
84
85			return _p_
86
87			def _update_pos(self, _cand_, _p_):
88			_cand_.pos_best = _p_.pos_new
89			_cand_.score_best = _p_.score_new
90
91			_p_.pos_current = _p_.pos_new
92			_p_.score_current = _p_.score_new
93
94			return _cand_, _p_
95
96			def search(self, nth_process, X, y):
97			self._config_, _cand_ = initialize_search(self._config_, nth_process, X, y)
98			_p_ = self._init_opt_positioner(_cand_, X, y)
99
100			for i in range(self._config_.n_iter):
101			_cand_ = self._iterate(i, _cand_, _p_, X, y)
102			self._config_.update_p_bar(1, _cand_)
103
104			if self._config_.get_search_path:
105			pos_list = []
106			score_list = []
107			if isinstance(_p_, list):
108			for p in _p_:
109			pos_list.append(p.pos_new)
110			score_list.append(p.score_new)
111
112			pos_list_ = np.array(pos_list)
113			score_list_ = np.array(score_list)
114
115			self.pos_list.append(pos_list_)
			0 ignored issues – show introduced 2019-09-08 11:07 UTC by Report Bug Copy Issue Report The variable `pos_list_` does not seem to be defined for all execution paths. Loading history...
116			self.score_list.append(score_list_)
			0 ignored issues – show introduced 2019-09-08 11:07 UTC by Report Bug Copy Issue Report The variable `score_list_` does not seem to be defined for all execution paths. Loading history...
117			else:
118			pos_list.append(_p_.pos_new)
119			score_list.append(_p_.score_new)
120
121			pos_list_ = np.array(pos_list)
122			score_list_ = np.array(score_list)
123
124			self.pos_list.append(pos_list_)
125			self.score_list.append(score_list_)
126
127			_cand_ = finish_search_(self._config_, _cand_, X, y)
128
129			return _cand_
130
131			def _search_multiprocessing(self, X, y):
132			"""Wrapper for the parallel search. Passes integer that corresponds to process number"""
133			pool = multiprocessing.Pool(self._config_.n_jobs)
134			search = partial(self.search, X=X, y=y)
135
136			_cand_list = pool.map(search, self._config_._n_process_range)
137
138			return _cand_list
139
140			def _run_one_job(self, X, y):
141			_cand_ = self.search(0, X, y)
142
143			self.model_best = _cand_.model_best
144			self.score_best = _cand_.score_best
145			start_point = _cand_._get_warm_start()
146
147			if self._config_.verbosity:
148			print("\nscore =", self.score_best)
149			print("start_point =", start_point)
150
151			if self._config_.meta_learn:
152			self._meta_.collect(X, y, _cand_list=[_cand_])
153
154			def _run_multiple_jobs(self, X, y):
155			_cand_list = self._search_multiprocessing(X, y)
156
157			start_point_list = []
158			score_best_list = []
159			model_best_list = []
160			for _cand_ in _cand_list:
161			model_best = _cand_.model_best
162			score_best = _cand_.score_best
163			start_point = _cand_._get_warm_start()
164
165			start_point_list.append(start_point)
166			score_best_list.append(score_best)
167			model_best_list.append(model_best)
168
169			start_point_sorted, score_best_sorted = sort_for_best(
170			start_point_list, score_best_list
171			)
172
173			model_best_sorted, score_best_sorted = sort_for_best(
174			model_best_list, score_best_list
175			)
176
177			if self._config_.verbosity:
178			for i in range(int(self._config_.n_jobs / 2)):
179			print("\n")
180			print("\nList of start points (best first):\n")
181			for start_point, score_best in zip(start_point_sorted, score_best_sorted):
182			print("score =", score_best)
183			print("start_point =", start_point, "\n")
184
185			self.score_best = score_best_sorted[0]
186			self.model_best = model_best_sorted[0]
187
188			def _fit(self, X, y):
189			"""Public method for starting the search with the training data (X, y)
190
191			Parameters
192			----------
193			X : array-like or sparse matrix of shape = [n_samples, n_features]
194
195			y : array-like, shape = [n_samples] or [n_samples, n_outputs]
196
197			Returns
198			-------
199			None
200			"""
201
202			if self._config_.n_jobs == 1:
203			self._run_one_job(X, y)
204			else:
205			self._run_multiple_jobs(X, y)
206

SimonBlanke / Hyperactive

Push — master ( 507ec3...bdda8d )

BaseOptimizer._run_multiple_jobs() B

Complexity

Size

Duplication

Importance

Duplication Side-by-Side

Filter issues like